diff --git a/docs/getting_started/quickstart/quickstart_nlp.ipynb b/docs/getting_started/quickstart/quickstart_nlp.ipynb index 472f3c7049..dd6353812e 100644 --- a/docs/getting_started/quickstart/quickstart_nlp.ipynb +++ b/docs/getting_started/quickstart/quickstart_nlp.ipynb @@ -49,7 +49,7 @@ }, "outputs": [], "source": [ - "! pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { @@ -329,7 +329,8 @@ "metadata": {}, "source": [ "If you are running in a notebook, you can display the scan report directly in the notebook using `display(...)`, otherwise you can export the report to an HTML file. Check the [API Reference](https://docs.giskard.ai/en/latest/reference/scan/report.html#giskard.scanner.report.ScanReport) for more details on the export methods available on the `ScanReport` class." - ] + ], + "id": "9dd5baaaa6a7ee62" }, { "cell_type": "code", diff --git a/docs/getting_started/quickstart/quickstart_tabular.ipynb b/docs/getting_started/quickstart/quickstart_tabular.ipynb index af5f42d7ad..ebfbea632e 100644 --- a/docs/getting_started/quickstart/quickstart_tabular.ipynb +++ b/docs/getting_started/quickstart/quickstart_tabular.ipynb @@ -49,7 +49,7 @@ }, "outputs": [], "source": [ - "! pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { @@ -294,7 +294,8 @@ "metadata": {}, "source": [ "If you are running in a notebook, you can display the scan report directly in the notebook using `display(...)`, otherwise you can export the report to an HTML file. Check the [API Reference](https://docs.giskard.ai/en/latest/reference/scan/report.html#giskard.scanner.report.ScanReport) for more details on the export methods available on the `ScanReport` class." - ] + ], + "id": "28272f36e73f8a76" }, { "cell_type": "code", diff --git a/docs/open_source/customize_tests/data_slices/index.md b/docs/open_source/customize_tests/data_slices/index.md index ec075f486e..03ed170b17 100644 --- a/docs/open_source/customize_tests/data_slices/index.md +++ b/docs/open_source/customize_tests/data_slices/index.md @@ -12,7 +12,7 @@ This section explains how to create your own slicing function, or customize the The [Giskard catalog](../../catalogs/slicing-function-catalog/index.rst) provides you with different slicing functions for NLP such as sentiment, hate, and toxicity detectors: -``` +```python #Load sentiment analysis model from the Giskard catalog from giskard.ml_worker.testing.functions.slicing import positive_sentiment_analysis ``` @@ -27,17 +27,20 @@ To create a Giskard slicing function, you just need to decorate an existing Pyth When `row_level=True`, you can decorate a function that takes a pandas dataframe **row** as input and returns a boolean. Make sure that the first argument of your function corresponds to the row you want to filter: -``` -from giskard import slicing_function, demo +```python import pandas as pd +from giskard import slicing_function, demo, Dataset + _, df = demo.titanic() dataset = Dataset(df=df, target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) + @slicing_function(row_level=True) def my_func2(row: pd.Series, threshold: int): return row['Age'] > threshold + dataset.slice(my_func2(threshold=20)) ``` @@ -47,18 +50,21 @@ dataset.slice(my_func2(threshold=20)) When `row_level=False`, you can decorate a function that takes a full **pandas dataframe** as input and returns a filtered pandas dataframe. Make sure that the first argument of your function corresponds to the pandas dataframe you want to filter: -``` -from giskard import slicing_function, demo +```python +from giskard import slicing_function, demo, Dataset import pandas as pd + _, df = demo.titanic() dataset = Dataset(df=df, target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) + @slicing_function(row_level=False) def my_func1(df: pd.DataFrame, threshold: int): df['Age'] = df['Age'] > threshold return df + dataset.slice(my_func1(threshold=20)) ``` @@ -68,18 +74,20 @@ dataset.slice(my_func1(threshold=20)) When `cell_level=True` (False by default), you can decorate a function that takes a **value** (string, numeric or text) as an argument and returns a boolean. Make sure that the first argument of your function corresponds to the value and that the second argument defines the **column name** where you want to filter the value: -``` -from giskard import slicing_function, demo -import pandas as pd +```python +from giskard import slicing_function, demo, Dataset + _, df = demo.titanic() dataset = Dataset(df=df, target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) + @slicing_function(cell_level=True) def my_func3(cell: int, threshold: int): - return cell>threshold + return cell > threshold + -train_df.slice(my_func3(threshold=20), column_name='Age') +dataset.slice(my_func3(threshold=20), column_name='Age') ``` :::: @@ -89,7 +97,11 @@ train_df.slice(my_func3(threshold=20), column_name='Age') Slicing functions can be very powerful to detect complex behaviour when they are used as fixtures inside your test suite. With the Giskard framework you can easily create complex slicing functions. For instance: -``` +```python +import pandas as pd +from giskard import slicing_function + + def _sentiment_analysis(x, column_name, threshold, model, emotion): from transformers import pipeline sentiment_pipeline = pipeline("sentiment-analysis", model=model) @@ -98,6 +110,7 @@ def _sentiment_analysis(x, column_name, threshold, model, emotion): return x.iloc[list( map(lambda s: s['label'] == emotion and s['score'] >= threshold, sentiment_pipeline(sentences)))] + @slicing_function(name="Emotion sentiment", row_level=False, tags=["sentiment", "text"]) def emotion_sentiment_analysis(x: pd.DataFrame, column_name: str, emotion: str, threshold: float = 0.9) -> pd.DataFrame: """ @@ -110,15 +123,16 @@ def emotion_sentiment_analysis(x: pd.DataFrame, column_name: str, emotion: str, Giskard enables you to automatically generate the slicing functions that are the most insightul for your ML models. You can easily extract the results of the [scan feature](../scan/index.rst) using the following code: -``` -from giskard import Dataset, Model +```python +from giskard import Dataset, Model, scan + my_dataset = Dataset(...) my_model = Model(...) -scan_result = giskard.scan(my_model, my_dataset) +scan_result = scan(my_model, my_dataset) test_suite = scan_result.generate_test_suite("My first test suite") -test_suite.run()[1] +test_suite.run() ``` ## Upload your slicing function to the Giskard hub diff --git a/docs/open_source/customize_tests/data_transformations/index.md b/docs/open_source/customize_tests/data_transformations/index.md index 81659fcb32..ad39c47c8e 100644 --- a/docs/open_source/customize_tests/data_transformations/index.md +++ b/docs/open_source/customize_tests/data_transformations/index.md @@ -11,8 +11,8 @@ This section explains how to create your own transformation function, or customi The [Giskard catalog](../../catalogs/transformation-function-catalog/index.rst) provides you with different transformation functions for NLP use cases such as *adding typos*, or *punctuation stripping*. -``` -#Import keyboard typo transformations +```python +# Import keyboard typo transformations from giskard.ml_worker.testing.functions.transformation import keyboard_typo_transformation ``` @@ -26,18 +26,21 @@ To create a Giskard transformation function, you just need to decorate an existi When `row_level=True`, you can decorate a function that takes a pandas dataframe **row** as input, and returns a boolean. Make sure that the first argument of your function corresponds to the row you want to filter: -``` -from giskard import transformation_function, demo +```python import pandas as pd +from giskard import transformation_function, demo, Dataset + _, my_df = demo.titanic() dataset = Dataset(df=my_df, target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) + @transformation_function(row_level=True) def my_func2(row: pd.Series, offset: int): row['Age'] = row['Age'] + offset return row + transformed_dataset = dataset.transform(my_func2(offset=20)) ``` @@ -47,18 +50,21 @@ transformed_dataset = dataset.transform(my_func2(offset=20)) When `row_level=False`, you can decorate a function that takes a full **pandas dataframe** as input, and returns a filtered pandas dataframe. Make sure that the first argument of your function corresponds to the pandas dataframe you want to filter: -``` -from giskard import transformation_function, demo +```python import pandas as pd +from giskard import transformation_function, demo, Dataset + _, df = demo.titanic() dataset = Dataset(df=df, target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) + @transformation_function(row_level=False) def my_func1(df: pd.DataFrame, offset: int): df['Age'] = df['Age'] + offset return df + transformed_dataset = dataset.transform(my_func1(offset=20)) ``` @@ -68,17 +74,19 @@ transformed_dataset = dataset.transform(my_func1(offset=20)) When `cell_level=True` (False by default), you can decorate a function that takes as argument a **value** (string, numeric or text), and returns a boolean. Make sure that the first argument of your function corresponds to the value, and that the second argument defines the **column name** where you want to filter the value: -``` -from giskard import transformation_function, demo -import pandas as pd +```python +from giskard import transformation_function, demo, Dataset + _, df = demo.titanic() dataset = Dataset(df=df, target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) + @transformation_function(cell_level=True) def my_func3(cell: int, offset: int): return cell + offset + transformed_dataset = dataset.transform(my_func3(offset=20), column_name='Age') ``` @@ -89,11 +97,22 @@ transformed_dataset = dataset.transform(my_func3(offset=20), column_name='Age') Transformation functions can be very powerful to detect complex behaviour when they are used as fixtures inside your test suite. With the Giskard framework you can easily create complex transformation functions. For example: -``` +```python +import os +import pandas as pd +from giskard import transformation_function + + @transformation_function(name="Change writing style", row_level=False, tags=['text']) -def change_writing_style(x: pd.DataFrame, index: int, column_name: str, style: str, - OPENAI_API_KEY: str) -> pd.DataFrame: - os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY +def change_writing_style( + x: pd.DataFrame, + index: int, + column_name: str, + style: str, + openai_api_key: str +) -> pd.DataFrame: + os.environ["OPENAI_API_KEY"] = openai_api_key + rewrite_prompt_template = """ As a text rewriting robot, your task is to rewrite a given text using a specified rewriting style. You will receive a prompt with the following format: ``` @@ -115,10 +134,9 @@ def change_writing_style(x: pd.DataFrame, index: int, column_name: str, style: s ``` """ - from langchain import PromptTemplate - from langchain import LLMChain - from langchain import OpenAI + from langchain import PromptTemplate, LLMChain, OpenAI + rewrite_prompt = PromptTemplate(input_variables=['text', 'style'], template=rewrite_prompt_template) chain_rewrite = LLMChain(llm=OpenAI(), prompt=rewrite_prompt) @@ -129,15 +147,16 @@ def change_writing_style(x: pd.DataFrame, index: int, column_name: str, style: s Giskard enables you to automatically generate the transformation functions that are the most insightul for your ML models. You can easily extract the results of the [scan feature](../scan/index.rst) using the following code: -``` -from giskard import Dataset, Model +```python +from giskard import Dataset, Model, scan + my_dataset = Dataset(...) my_model = Model(...) -scan_result = giskard.scan(my_model, my_dataset) +scan_result = scan(my_model, my_dataset) test_suite = scan_result.generate_test_suite("My first test suite") -test_suite.run()[1] +test_suite.run() ``` ## Save your transformation function diff --git a/docs/open_source/customize_tests/test_model/index.md b/docs/open_source/customize_tests/test_model/index.md index 914d9d1438..3fc776d055 100644 --- a/docs/open_source/customize_tests/test_model/index.md +++ b/docs/open_source/customize_tests/test_model/index.md @@ -27,8 +27,9 @@ Testing for drift enables you to make sure your model is still valid by checking Thanks to Giskard, your drift tests can **focus on specific data slices** by passing a [slicing function](../data_slices/index.md) (generated by the scan for example) as a parameter of your test. ```python -from giskard import demo, Model, Dataset, testing, slicing_function import pandas as pd +from giskard import demo, Model, Dataset, testing, slicing_function + model, df = demo.titanic() @@ -36,6 +37,7 @@ wrapped_model = Model(model=model, model_type="classification") train_df = Dataset(df=df.head(400), target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) test_df = Dataset(df=df.tail(400), target="Survived", cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"]) + # Create a slicing function on females to create more domain-specific tests @slicing_function(name="females") def female_slice(row: pd.Series): @@ -61,8 +63,9 @@ print(f"Metric: {result.metric}") Performance tests are probably the most well-known by data scientists. Using Giskard you can focus your performance tests on specific data slices by passing a [slicing function](../data_slices/index.md) (generated by the scan for example) as a parameter of your test. To know more about Giskard performance tests, check out our [performance catalog](../../../reference/tests/performance.rst). ```python -from giskard import demo, Model, Dataset, testing, slicing_function import pandas as pd +from giskard import demo, Model, Dataset, testing, slicing_function + model, df = demo.titanic() @@ -73,11 +76,13 @@ wrapped_dataset = Dataset( cat_columns=["Pclass", "Sex", "SibSp", "Parch", "Embarked"], ) + # Create a slicing function on females to create more domain-specific tests @slicing_function(name="females") def female_slice(row: pd.Series): return row["Sex"] == "female" + result = testing.test_f1( dataset=wrapped_dataset, model=wrapped_model, @@ -95,7 +100,7 @@ With Giskard, creating metamorphic tests becomes simple. You can automatically g ```python from giskard import demo, Model, Dataset, testing, transformation_function -import pandas as pd + model, df = demo.titanic() @@ -106,12 +111,14 @@ wrapped_dataset = Dataset( cat_columns=["Pclass", "Sex", "SibSp", "Parch", "Embarked"], ) + # Increase the age by 10% to check if we have more "survived" probability @transformation_function(name="increase age") def increase_age(row): - row["Age"] = row["Age"] * 0.1 + row["Age"] = row["Age"] * 1.1 return row + result = testing.test_metamorphic_invariance( model=wrapped_model, dataset=wrapped_dataset, @@ -128,6 +135,7 @@ Statistical tests enable you to write some heuristics on the behavior of the mod ```python from giskard import demo, Model, Dataset, testing + model, df = demo.titanic() wrapped_model = Model(model=model, model_type="classification") @@ -159,6 +167,7 @@ If the test you want to create is not in the Giskard catalog, you can easily wri ```python from giskard import demo, test, Dataset, TestResult, testing + #Creating a data quality test checking if the frequency of a category is under a threshold @test(name="My Example", tags=["quality", "custom"]) def uniqueness_test_function(dataset: Dataset, @@ -170,6 +179,7 @@ def uniqueness_test_function(dataset: Dataset, return TestResult(passed=passed, metric=freq_of_cat) + #Now let's run this test to check if the frequency of "female" is under 70% _, df = demo.titanic() @@ -178,6 +188,7 @@ wrapped_dataset = Dataset( target="Survived", cat_columns=["Pclass", "Sex", "SibSp", "Parch", "Embarked"], ) + uniqueness_test_function(dataset=wrapped_dataset, column_name = "Sex", category="female", @@ -207,7 +218,9 @@ Test suites are a key feature of Giskard. Executing test suites can be useful fo Having the model as suite input enables you to compare models while in development (ex: to fine tune your model) or during production (ex: to automate retraining process). In the example below, we create a suite with two simple performance tests. As you see below, we specify all the test parameters **except the model to "expose" it as the suite input**: ```python -from giskard import demo, Model, Dataset, testing, Suite +import pandas as pd +from giskard import demo, Model, Dataset, testing, Suite, slicing_function + model, df = demo.titanic() @@ -244,13 +257,15 @@ suite = ( my_first_model = Model(model=model, model_type="classification") # Run the suite by specifying our model and display the results -passed_first, results_first = suite.run(model=my_first_model) +suite_results = suite.run(model=my_first_model) +passed_first, results_first = suite_results.passed, suite_results.results # Create an improved version of our model my_improved_model = Model(model=model, model_type="classification") # Run the suite with our new version and check if the results improved -passed_second, results_second = suite.run(model=my_improved_model) +suite_results = suite.run(model=my_improved_model) +passed_second, results_second = suite_results.passed, suite_results.results ``` ::: @@ -261,15 +276,8 @@ Having the dataset as suite input enables you to follow the behavior of differen ```python import pandas as pd -from giskard import ( - demo, - Model, - Dataset, - testing, - Suite, - transformation_function, - slicing_function, -) +from giskard import demo, Model, Dataset, testing, Suite, slicing_function + model, df = demo.titanic() @@ -295,7 +303,7 @@ suite = ( .add_test( testing.test_drift_prediction_ks( model=wrapped_model, - slicing_function=slice_female, + slicing_function=slice_sex, reference_dataset=golden, classification_label="yes", ) @@ -316,7 +324,8 @@ batch_1 = Dataset( ) # Run the suite by specifying our model and display the results -passed_1, results_1 = suite.run(actual_dataset=batch_1) +suite_results = suite.run(actual_dataset=batch_1) +passed_1, results_1 = suite_results.passed, suite_results.results # batch_2 can be a second batch of production data batch_2 = Dataset( @@ -326,7 +335,8 @@ batch_2 = Dataset( ) # Run the suite with our new version and check if the results improved -passed_2, results_2 = suite.run(actual_dataset=batch_2) +suite_results = suite.run(actual_dataset=batch_2) +passed_2, results_2 = suite_results.passed, suite_results.results ``` ::: @@ -339,17 +349,9 @@ For advanced cases, you may need to define some test inputs that are shared betw In the example below, the data slice `female` is shared between two performance tests: ```python -from giskard import ( - demo, - Model, - Dataset, - testing, - Suite, - SuiteInput, - slicing_function, - SlicingFunction, -) import pandas as pd +from giskard import demo, Model, Dataset, testing, Suite, SuiteInput, slicing_function, SlicingFunction + model, df = demo.titanic() diff --git a/docs/reference/notebooks/LLM_Description_Product.ipynb b/docs/reference/notebooks/LLM_Description_Product.ipynb index dcd1f45dd8..6d2ea62750 100644 --- a/docs/reference/notebooks/LLM_Description_Product.ipynb +++ b/docs/reference/notebooks/LLM_Description_Product.ipynb @@ -64,7 +64,7 @@ "execution_count": null, "outputs": [], "source": [ - "!pip install \"giskard[llm]\" --upgrade" + "%pip install \"giskard[llm]\" --upgrade" ], "metadata": { "collapsed": false diff --git a/docs/reference/notebooks/LLM_Newspaper_Comment_Generation.ipynb b/docs/reference/notebooks/LLM_Newspaper_Comment_Generation.ipynb index ce4d3c76cb..27ac310821 100644 --- a/docs/reference/notebooks/LLM_Newspaper_Comment_Generation.ipynb +++ b/docs/reference/notebooks/LLM_Newspaper_Comment_Generation.ipynb @@ -53,7 +53,7 @@ }, "outputs": [], "source": [ - "!pip install \"giskard[llm]\" --upgrade" + "%pip install \"giskard[llm]\" --upgrade" ] }, { @@ -73,7 +73,7 @@ }, "outputs": [], "source": [ - "!pip install openai --upgrade" + "%pip install \"openai<1\" --upgrade" ] }, { @@ -101,8 +101,8 @@ "\n", "import openai\n", "import pandas as pd\n", - "from langchain.llms import OpenAI\n", "from langchain import PromptTemplate, LLMChain\n", + "from langchain.llms import OpenAI\n", "\n", "from giskard import Dataset, Model, scan, GiskardClient" ] @@ -322,7 +322,7 @@ "source": [ "giskard_model = Model(\n", " model=chain,\n", - " model_type=\"text_generation\", \n", + " model_type=\"text_generation\",\n", " name=\"Comment generation\",\n", " description=\"This model is a professional newspapers commentator.\",\n", " feature_names=[TEXT_COLUMN_NAME]\n", diff --git a/docs/reference/notebooks/LLM_QA_Documentation.ipynb b/docs/reference/notebooks/LLM_QA_Documentation.ipynb index a57dd53134..583a7c499f 100644 --- a/docs/reference/notebooks/LLM_QA_Documentation.ipynb +++ b/docs/reference/notebooks/LLM_QA_Documentation.ipynb @@ -54,7 +54,7 @@ }, "outputs": [], "source": [ - "!pip install \"giskard[llm]\" --upgrade" + "%pip install \"giskard[llm]\" --upgrade" ] }, { @@ -78,7 +78,7 @@ }, "outputs": [], "source": [ - "!pip install openai unstructured pdf2image pdfminer-six faiss-cpu" + "%pip install openai unstructured pdf2image pdfminer-six faiss-cpu" ] }, { diff --git a/docs/reference/notebooks/LLM_QA_Google.ipynb b/docs/reference/notebooks/LLM_QA_Google.ipynb index ed1a9c3b4d..b9e144e9f4 100644 --- a/docs/reference/notebooks/LLM_QA_Google.ipynb +++ b/docs/reference/notebooks/LLM_QA_Google.ipynb @@ -45,7 +45,7 @@ "execution_count": 1, "outputs": [], "source": [ - "!pip install \"giskard[llm]\" --upgrade" + "%pip install \"giskard[llm]\" --upgrade" ], "metadata": { "collapsed": false, @@ -69,7 +69,7 @@ "execution_count": 2, "outputs": [], "source": [ - "!pip install qdrant-client" + "%pip install qdrant-client" ], "metadata": { "collapsed": false, @@ -94,8 +94,8 @@ "execution_count": null, "outputs": [], "source": [ - "! wget https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/vector_databases/qdrant/docker-compose.yaml -O docker-compose.yaml \n", - "! docker-compose up -d; curl http://localhost:6333" + "%wget https://raw.githubusercontent.com/openai/openai-cookbook/main/examples/vector_databases/qdrant/docker-compose.yaml -O docker-compose.yaml \n", + "%docker-compose up -d; curl http://localhost:6333" ], "metadata": { "collapsed": false diff --git a/docs/reference/notebooks/LLM_QA_IPCC.ipynb b/docs/reference/notebooks/LLM_QA_IPCC.ipynb index 13503c7d98..fdeffdf76c 100644 --- a/docs/reference/notebooks/LLM_QA_IPCC.ipynb +++ b/docs/reference/notebooks/LLM_QA_IPCC.ipynb @@ -54,7 +54,7 @@ }, "outputs": [], "source": [ - "!pip install \"giskard[llm]\" --upgrade" + "%pip install \"giskard[llm]\" --upgrade" ] }, { @@ -78,7 +78,7 @@ }, "outputs": [], "source": [ - "!pip install \"langchain<=0.0.301\" \"pypdf<=3.17.0\" \"faiss-cpu<=1.7.4\" \"openai<=0.28.1\" \"tiktoken<=0.5.1\"" + "%pip install \"langchain<=0.0.301\" \"pypdf<=3.17.0\" \"faiss-cpu<=1.7.4\" \"openai<=0.28.1\" \"tiktoken<=0.5.1\"" ] }, { diff --git a/docs/reference/notebooks/LLM_QA_Winter_Olympics.ipynb b/docs/reference/notebooks/LLM_QA_Winter_Olympics.ipynb index abc947d8c9..315a3b9b11 100644 --- a/docs/reference/notebooks/LLM_QA_Winter_Olympics.ipynb +++ b/docs/reference/notebooks/LLM_QA_Winter_Olympics.ipynb @@ -49,7 +49,7 @@ "execution_count": 1, "outputs": [], "source": [ - "!pip install \"giskard[llm]\" --upgrade" + "%pip install \"giskard[llm]\" --upgrade" ], "metadata": { "collapsed": false, @@ -75,7 +75,7 @@ "execution_count": 2, "outputs": [], "source": [ - "!pip install openai tiktoken ast" + "%pip install openai tiktoken ast" ], "metadata": { "collapsed": false, diff --git a/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb b/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb index df26c09114..f8a83dc9b7 100644 --- a/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb +++ b/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { @@ -68,7 +68,7 @@ "execution_count": null, "outputs": [], "source": [ - "!pip install accelerate --upgrade" + "%pip install accelerate --upgrade" ], "metadata": { "collapsed": false diff --git a/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb b/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb index aea12ed26d..8444b9f271 100644 --- a/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb +++ b/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/api_model.ipynb b/docs/reference/notebooks/api_model.ipynb index 7985b7f694..70f267e5fc 100644 --- a/docs/reference/notebooks/api_model.ipynb +++ b/docs/reference/notebooks/api_model.ipynb @@ -1,147 +1,148 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "f6dc35b9", - "metadata": {}, - "source": [ - "# Connecting Giskard to an AI model hosted by API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0361f8a8", - "metadata": {}, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "fd6208a2", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import requests\n", - "\n", - "import giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a30c144", - "metadata": {}, - "outputs": [], - "source": [ - "def prediction_function(input_data: pd.DataFrame):\n", - " # Set up the API endpoint URL and parameters\n", - " api_endpoint = \"https://api.example.com/predict\"\n", - " api_params = {\"input\": input_data}\n", - "\n", - " # Send a GET request to the API endpoint and get the response\n", - " response = requests.get(api_endpoint, params=api_params)\n", - "\n", - " # Raise error if an error has occurred\n", - " response.raise_for_status()\n", - "\n", - " # Extract the predictions from the JSON response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23e3f716", - "metadata": {}, - "outputs": [], - "source": [ - "# Wrap your model with Giskard.Model:\n", - "giskard_model = giskard.Model(\n", - " model=prediction_function,\n", - " model_type=\"classification\",\n", - " feature_names=[\"feature1\", \"feature2\", \"feature3\"],\n", - " classification_labels=[\"label1\", \"label2\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5028344d", - "metadata": {}, - "outputs": [], - "source": [ - "# Wrap your Pandas DataFrame with Giskard.Dataset, containing examples such as:\n", - "giskard_dataset = giskard.Dataset(df=..., target=\"target\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1b5f71b", - "metadata": {}, - "outputs": [], - "source": [ - "# Then apply the scan\n", - "results = giskard.scan(giskard_model, giskard_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e3bbe54", - "metadata": {}, - "outputs": [], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard hub\n", - "\n", - "If you are using Google Colab and you want to install the Giskard hub **locally**, you can run the Giskard hub by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard hub start\n", - "\n", - "Once the Giskard hub is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard hub expose --ngrok-token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard hub\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "f6dc35b9", + "metadata": {}, + "source": [ + "# Connecting Giskard to an AI model hosted by API" + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "code", + "execution_count": null, + "id": "0361f8a8", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install giskard -U" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fd6208a2", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import requests\n", + "\n", + "import giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a30c144", + "metadata": {}, + "outputs": [], + "source": [ + "def prediction_function(input_data: pd.DataFrame):\n", + " # Set up the API endpoint URL and parameters\n", + " api_endpoint = \"https://api.example.com/predict\"\n", + " api_params = {\"input\": input_data}\n", + "\n", + " # Send a GET request to the API endpoint and get the response\n", + " response = requests.get(api_endpoint, params=api_params)\n", + "\n", + " # Raise error if an error has occurred\n", + " response.raise_for_status()\n", + "\n", + " # Extract the predictions from the JSON response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e3f716", + "metadata": {}, + "outputs": [], + "source": [ + "# Wrap your model with Giskard.Model:\n", + "giskard_model = giskard.Model(\n", + " model=prediction_function,\n", + " model_type=\"classification\",\n", + " feature_names=[\"feature1\", \"feature2\", \"feature3\"],\n", + " classification_labels=[\"label1\", \"label2\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5028344d", + "metadata": {}, + "outputs": [], + "source": [ + "# Wrap your Pandas DataFrame with Giskard.Dataset, containing examples such as:\n", + "giskard_dataset = giskard.Dataset(df=..., target=\"target\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1b5f71b", + "metadata": {}, + "outputs": [], + "source": [ + "# Then apply the scan\n", + "results = giskard.scan(giskard_model, giskard_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e3bbe54", + "metadata": {}, + "outputs": [], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard hub\n", + "\n", + "If you are using Google Colab and you want to install the Giskard hub **locally**, you can run the Giskard hub by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard hub start\n", + "\n", + "Once the Giskard hub is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard hub expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard hub\n", + "
" + ], + "id": "8e70afa73d2dcba1" + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/reference/notebooks/cancer_detection_xgboost.ipynb b/docs/reference/notebooks/cancer_detection_xgboost.ipynb index 8ad79fe3b9..d8b1e83afd 100644 --- a/docs/reference/notebooks/cancer_detection_xgboost.ipynb +++ b/docs/reference/notebooks/cancer_detection_xgboost.ipynb @@ -53,7 +53,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { @@ -87,14 +87,14 @@ ] }, { - "cell_type": "raw", + "cell_type": "markdown", "source": [ "## Define constants" ], "metadata": { "collapsed": false }, - "id": "c78b3697f9c3be5d" + "id": "9dac8b68bec87e9d" }, { "cell_type": "code", diff --git a/docs/reference/notebooks/churn_prediction_lgbm.ipynb b/docs/reference/notebooks/churn_prediction_lgbm.ipynb index 8e9bfe4ee4..3a15354eb0 100644 --- a/docs/reference/notebooks/churn_prediction_lgbm.ipynb +++ b/docs/reference/notebooks/churn_prediction_lgbm.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/credit_scoring.ipynb b/docs/reference/notebooks/credit_scoring.ipynb index da77e4275e..8b30c1ef37 100644 --- a/docs/reference/notebooks/credit_scoring.ipynb +++ b/docs/reference/notebooks/credit_scoring.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/drug_classification_sklearn.ipynb b/docs/reference/notebooks/drug_classification_sklearn.ipynb index 058f8c7b1b..789b29dd82 100644 --- a/docs/reference/notebooks/drug_classification_sklearn.ipynb +++ b/docs/reference/notebooks/drug_classification_sklearn.ipynb @@ -54,7 +54,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { @@ -80,7 +80,7 @@ }, "outputs": [], "source": [ - "!pip install imblearn" + "%pip install imblearn" ] }, { diff --git a/docs/reference/notebooks/fake_real_news_classification.ipynb b/docs/reference/notebooks/fake_real_news_classification.ipynb index d5a77ba8d0..1c28af44b3 100644 --- a/docs/reference/notebooks/fake_real_news_classification.ipynb +++ b/docs/reference/notebooks/fake_real_news_classification.ipynb @@ -49,7 +49,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/hotel_text_regression.ipynb b/docs/reference/notebooks/hotel_text_regression.ipynb index 5365e2e8fd..aeb531ca4d 100644 --- a/docs/reference/notebooks/hotel_text_regression.ipynb +++ b/docs/reference/notebooks/hotel_text_regression.ipynb @@ -47,7 +47,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb b/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb index 60b1b3c056..c753958eaa 100644 --- a/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb +++ b/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb @@ -48,7 +48,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/insurance_prediction_lgbm.ipynb b/docs/reference/notebooks/insurance_prediction_lgbm.ipynb index 21539af18b..6143d82d91 100644 --- a/docs/reference/notebooks/insurance_prediction_lgbm.ipynb +++ b/docs/reference/notebooks/insurance_prediction_lgbm.ipynb @@ -50,7 +50,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { @@ -68,7 +68,7 @@ "execution_count": null, "outputs": [], "source": [ - "!pip install lightgbm" + "%pip install lightgbm" ], "metadata": { "collapsed": false, diff --git a/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb b/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb index 6829b7b932..64a7a59ac1 100644 --- a/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb +++ b/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb @@ -49,7 +49,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb b/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb index a6b5c5460f..419b905837 100644 --- a/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb +++ b/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb @@ -50,7 +50,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { @@ -72,7 +72,7 @@ }, "outputs": [], "source": [ - "!pip install nltk" + "%pip install nltk" ] }, { diff --git a/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb b/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb index c78b1585c0..48a0a7abc6 100644 --- a/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb +++ b/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb @@ -38,9 +38,11 @@ } }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, + "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ], "metadata": { "collapsed": false diff --git a/docs/reference/notebooks/newspaper_classification_pytorch.ipynb b/docs/reference/notebooks/newspaper_classification_pytorch.ipynb index 29b4178f3b..98eafdc294 100644 --- a/docs/reference/notebooks/newspaper_classification_pytorch.ipynb +++ b/docs/reference/notebooks/newspaper_classification_pytorch.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/titanic.ipynb b/docs/reference/notebooks/titanic.ipynb index a03b4ea069..7c30445d90 100644 --- a/docs/reference/notebooks/titanic.ipynb +++ b/docs/reference/notebooks/titanic.ipynb @@ -49,7 +49,7 @@ }, "outputs": [], "source": [ - "! pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb b/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb index 19b33f2902..3af7495ae3 100644 --- a/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb +++ b/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/twitter_sentiment_analysis_roberta.ipynb b/docs/reference/notebooks/twitter_sentiment_analysis_roberta.ipynb index a1d4e70a23..a2b2ebee77 100644 --- a/docs/reference/notebooks/twitter_sentiment_analysis_roberta.ipynb +++ b/docs/reference/notebooks/twitter_sentiment_analysis_roberta.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "! pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, { diff --git a/docs/reference/notebooks/wage_classification.ipynb b/docs/reference/notebooks/wage_classification.ipynb index 6c74a3e34e..7ebbc9823e 100644 --- a/docs/reference/notebooks/wage_classification.ipynb +++ b/docs/reference/notebooks/wage_classification.ipynb @@ -47,7 +47,7 @@ }, "outputs": [], "source": [ - "!pip install giskard --upgrade" + "%pip install giskard --upgrade" ] }, {