Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 31 additions & 12 deletions .github/workflows/build-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,29 +48,37 @@ defaults:
shell: bash
jobs:
build-python:
name: "Python ${{ matrix.python-version }}${{ matrix.pydantic_v2 && ' (Pydantic V2)' || ''}} on ${{ matrix.os }}${{matrix.experimental && ' (Non failing)' || '' }}"
name: "Python ${{ matrix.python-version }}${{ matrix.pandas_v1 && ' (Pandas V1)' || ''}}${{ matrix.pydantic_v1 && ' (Pydantic V1)' || ''}} on ${{ matrix.os }}${{matrix.experimental && ' (Non failing)' || '' }}"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false # Do not stop when any job fails
matrix:
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.9", "3.10", "3.11" ]
os: [ubuntu-latest]
pydantic_v2: [false]
pydantic_v1: [false]
pandas_v1: [false]
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
include:
- python-version: "3.10"
os: windows-2019
pydantic_v2: false
pydantic_v1: false
pandas_v1: false
- python-version: "3.10"
os: windows-2022
pydantic_v2: false
pydantic_v1: false
pandas_v1: false
- python-version: "3.10"
os: macos-latest
pydantic_v2: false
pydantic_v1: false
pandas_v1: false
- python-version: "3.10"
os: ubuntu-latest
pydantic_v2: true

pydantic_v1: true
pandas_v1: false
- python-version: "3.10"
os: ubuntu-latest
pydantic_v1: false
pandas_v1: true
continue-on-error: false # https://ncorti.com/blog/howto-github-actions-build-matrix
steps:
- name: Checkout code
Expand Down Expand Up @@ -101,16 +109,27 @@ jobs:
- name: Lint code
run: pdm run lint

- name: Install pydantic v2
if: ${{ matrix.pydantic_v2 }}
- name: Install pydantic v1
if: ${{ matrix.pydantic_v1 }}
run: |
pdm run pip uninstall pydantic pydantic_core -y
pdm run pip install "pydantic>=2<3"
pdm run pip install "pydantic>=1,<2"

- name: Check Pydantic installed version
run: |
pdm run pip freeze | grep '^pydantic'
pdm run pip freeze | grep -q '^pydantic==${{ matrix.pydantic_v2 && '2' || '1' }}\.'
pdm run pip freeze | grep -q '^pydantic==${{ matrix.pydantic_v1 && '1' || '2' }}\.'

- name: Install pandas v1
if: ${{ matrix.pandas_v1 }}
run: |
pdm run pip uninstall pandas -y
pdm run pip install "pandas<2"

- name: Check Pandas installed version
run: |
pdm run pip freeze | grep '^pandas'
pdm run pip freeze | grep -q '^pandas==${{ matrix.pandas_v1 && '1' || '2' }}\.'

- name: Test code
env:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ You can install the latest version of Giskard from PyPi using pip :
```sh
pip install "giskard[server]>=2.0.0b" -U
```
We officially support Python 3.8, 3.9, 3.10 and 3.11.
We officially support Python 3.9, 3.10 and 3.11.
## Try in Colab 📙
[Open Colab notebook](https://colab.research.google.com/github/giskard-ai/giskard/blob/main/docs/getting-started/quickstart.ipynb)

Expand Down
44 changes: 22 additions & 22 deletions giskard/testing/tests/drift.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,19 @@ def _calculate_drift_psi(actual_series, reference_series, max_categories):
expected_distribution = expected_frequencies / len(reference_series)
actual_distribution = actual_frequencies / len(actual_series)
total_psi = 0
output_data = pd.DataFrame(columns=["Modality", "Reference_distribution", "Actual_distribution", "Psi"])
for category in range(len(all_modalities)):
output_data = []
for category, modality in enumerate(all_modalities):
modality_psi = _calculate_psi(category, actual_distribution, expected_distribution)

total_psi += modality_psi
row = {
"Modality": all_modalities[category],
"Reference_distribution": expected_distribution[category],
"Actual_distribution": expected_distribution[category],
"Psi": modality_psi,
}

output_data = output_data.append(pd.Series(row), ignore_index=True)
return total_psi, output_data
output_data.append(
{
"Modality": modality,
"Reference_distribution": expected_distribution[category],
"Actual_distribution": expected_distribution[category],
"Psi": modality_psi,
}
)
return total_psi, pd.DataFrame(output_data)


def _calculate_ks(actual_series, reference_series) -> Ks_2sampResult:
Expand Down Expand Up @@ -131,28 +130,29 @@ def _calculate_chi_square(actual_series, reference_series, max_categories):
# so that reference and actual has the same size
# See https://github.com/scipy/scipy/blob/v1.8.0/scipy/stats/_stats_py.py#L6787
k_norm = actual_series.shape[0] / reference_series.shape[0]
output_data = pd.DataFrame(columns=["Modality", "Reference_frequencies", "Actual_frequencies", "Chi_square"])
for i in range(len(all_modalities)):
output_data = []
for i, modality in enumerate(all_modalities):
chi_square_value = (actual_frequencies[i] - expected_frequencies[i] * k_norm) ** 2 / (
expected_frequencies[i] * k_norm
)
chi_square += chi_square_value

row = {
"Modality": all_modalities[i],
"Reference_frequencies": expected_frequencies[i],
"Actual_frequencies": actual_frequencies[i],
"Chi_square": chi_square_value,
}
output_data.append(
{
"Modality": modality,
"Reference_frequencies": expected_frequencies[i],
"Actual_frequencies": actual_frequencies[i],
"Chi_square": chi_square_value,
}
)

output_data = output_data.append(pd.Series(row), ignore_index=True)
# if reference_series and actual_series has only one modality it turns nan (len(all_modalities)=1)
if len(all_modalities) > 1:
chi_cdf = chi2.cdf(chi_square, len(all_modalities) - 1)
p_value = 1 - chi_cdf if chi_cdf != 0 else 0
else:
p_value = 0
return chi_square, p_value, output_data
return chi_square, p_value, pd.DataFrame(output_data)


def _validate_feature_type(gsk_dataset, column_name, feature_type):
Expand Down
Loading