Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion giskard/core/model_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def validate_model_loading_and_saving(model: BaseModel):
with tempfile.TemporaryDirectory(prefix="giskard-model-") as f:
model.save(f)

with open(f + "/giskard-model-meta.yaml") as yaml_f:
with open(f + "/giskard-model-meta.yaml", encoding="utf-8") as yaml_f:
saved_meta = yaml.load(yaml_f, Loader=yaml.Loader)

meta = ModelMeta(
Expand Down
4 changes: 2 additions & 2 deletions giskard/core/savable.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _get_meta_endpoint(cls, uuid: str, project_key: str) -> str:
return posixpath.join("project", project_key, cls._get_name(), uuid)

def _save_meta_locally(self, local_dir):
with open(Path(local_dir) / "meta.yaml", "w") as f:
with open(Path(local_dir) / "meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(self.meta, f)


Expand All @@ -70,7 +70,7 @@ def _load_meta_locally(cls, local_dir, uuid: str) -> Optional[SMT]:
if meta is not None:
return meta

with open(local_dir / "meta.yaml", "r") as f:
with open(local_dir / "meta.yaml", "r", encoding="utf-8") as f:
return yaml.load(f, Loader=yaml.Loader)

@classmethod
Expand Down
10 changes: 5 additions & 5 deletions giskard/core/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ def to_json(self, filename=None):
"metric_value": suite_result.result.metric,
}
if filename is not None:
with open(filename, "w") as json_file:
json.dump(results, json_file, indent=4)
with open(filename, "w", encoding="utf-8") as json_file:
json.dump(results, json_file, indent=4, ensure_ascii=False)
else:
return json.dumps(results, indent=4)

Expand Down Expand Up @@ -628,8 +628,8 @@ def save(self, folder: str):

json_content = self._to_json(folder_path, saved_uuid_status)

with open(folder_path / "suite.json", "w") as f:
json.dump(json_content, f)
with open(folder_path / "suite.json", "w", encoding="utf-8") as f:
json.dump(json_content, f, ensure_ascii=False)

analytics.track("lib:test_suite:saved")

Expand Down Expand Up @@ -843,7 +843,7 @@ def _contains_test(self, test: TestFunctionMeta):
def load(cls, folder: str) -> "Suite":
folder_path = Path(folder)

with open(folder_path / "suite.json", "r") as f:
with open(folder_path / "suite.json", "r", encoding="utf-8") as f:
suite_json = json.load(f)

suite = Suite(name=suite_json.get("name", "Unnamed test suite"))
Expand Down
4 changes: 2 additions & 2 deletions giskard/datasets/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def cast_column_to_dtypes(df, column_dtypes):
@classmethod
def load(cls, local_path: str):
# load metadata
with open(Path(local_path) / "giskard-dataset-meta.yaml", "r") as meta_f:
with open(Path(local_path) / "giskard-dataset-meta.yaml", "r", encoding="utf-8") as meta_f:
meta = yaml.safe_load(meta_f)

# load data
Expand Down Expand Up @@ -560,7 +560,7 @@ def save(self, local_path: str):
f.write(compressed_bytes)
original_size_bytes, compressed_size_bytes = len(uncompressed_bytes), len(compressed_bytes)

with open(Path(local_path) / "giskard-dataset-meta.yaml", "w") as meta_f:
with open(Path(local_path) / "giskard-dataset-meta.yaml", "w", encoding="utf-8") as meta_f:
yaml.dump(
{
"id": str(self.id),
Expand Down
4 changes: 2 additions & 2 deletions giskard/models/base/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def save_model_postprocessing_function(self, local_path: Union[str, Path], *_arg
cloudpickle.dump(self.model_postprocessing_function, f, protocol=pickle.DEFAULT_PROTOCOL)

def save_wrapper_meta(self, local_path, *_args, **_kwargs):
with open(Path(local_path) / "giskard-model-wrapper-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-wrapper-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"batch_size": self.batch_size,
Expand Down Expand Up @@ -313,7 +313,7 @@ def load_model_postprocessing_function(cls, local_path: Union[str, Path], *_args
def load_wrapper_meta(cls, local_dir, *args, **kwargs):
wrapper_meta_file = Path(local_dir) / "giskard-model-wrapper-meta.yaml"
if wrapper_meta_file.exists():
with open(wrapper_meta_file) as f:
with open(wrapper_meta_file, encoding="utf-8") as f:
wrapper_meta = yaml.load(f, Loader=yaml.Loader)
wrapper_meta["batch_size"] = int(wrapper_meta["batch_size"]) if wrapper_meta["batch_size"] else None
return wrapper_meta
Expand Down
5 changes: 3 additions & 2 deletions giskard/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class explicitly using :class:`giskard.models.huggingface.HuggingFaceModel`.
the `model_postprocessing_function` argument. This function should take the
raw output of your model and return a numpy array of probabilities.
"""

from typing import Any, Callable, Iterable, Optional, Tuple, Union

import logging
Expand Down Expand Up @@ -199,7 +200,7 @@ def __init__(
def load_model(cls, local_path, model_py_ver: Optional[Tuple[str, str, str]] = None, *args, **kwargs):
huggingface_meta_file = Path(local_path) / "giskard-model-huggingface-meta.yaml"
if huggingface_meta_file.exists():
with open(huggingface_meta_file) as f:
with open(huggingface_meta_file, encoding="utf-8") as f:
huggingface_meta = yaml.load(f, Loader=yaml.Loader)

if huggingface_meta["pipeline_task"]:
Expand All @@ -208,7 +209,7 @@ def load_model(cls, local_path, model_py_ver: Optional[Tuple[str, str, str]] = N
return huggingface_meta["huggingface_module"].from_pretrained(local_path)

def save_huggingface_meta(self, local_path, *args, **kwargs):
with open(Path(local_path) / "giskard-model-huggingface-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-huggingface-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"huggingface_module": self.huggingface_module,
Expand Down
4 changes: 2 additions & 2 deletions giskard/models/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def _convert_to_numpy(self, raw_predictions):
return super()._convert_to_numpy(raw_predictions)

def save_pytorch_meta(self, local_path, *_args, **_kwargs):
with open(Path(local_path) / "giskard-model-pytorch-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-pytorch-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"device": self.device,
Expand All @@ -224,7 +224,7 @@ def load(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, *a
def load_pytorch_meta(cls, local_dir):
pytorch_meta_file = Path(local_dir) / "giskard-model-pytorch-meta.yaml"
if pytorch_meta_file.exists():
with open(pytorch_meta_file) as f:
with open(pytorch_meta_file, encoding="utf-8") as f:
pytorch_meta = yaml.load(f, Loader=yaml.Loader)
pytorch_meta["device"] = pytorch_meta.get("device")
pytorch_meta["torch_dtype"] = pytorch_meta.get("torch_dtype")
Expand Down
8 changes: 4 additions & 4 deletions giskard/rag/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,11 @@ def load(
The embedding model to use inside the knowledge base. If not provided, the default model will be used.
"""
path = Path(folder_path)
knowledge_base_meta = json.load(open(path / "knowledge_base_meta.json", "r"))
knowledge_base_meta = json.load(open(path / "knowledge_base_meta.json", "r", encoding="utf-8"))
knowledge_base_data = pd.read_json(path / "knowledge_base.jsonl", orient="records", lines=True)
testset = QATestset.load(path / "testset.jsonl")

answers = json.load(open(path / "agent_answer.json", "r"))
answers = json.load(open(path / "agent_answer.json", "r", encoding="utf-8"))
model_outputs = [AgentAnswer(**answer) for answer in answers]

topics = {int(k): topic for k, topic in knowledge_base_meta.pop("topics", None).items()}
Expand All @@ -219,9 +219,9 @@ def load(

metrics_results = {}
if (path / "metrics_results.json").exists():
metrics_results = json.load(open(path / "metrics_results.json", "r"))
metrics_results = json.load(open(path / "metrics_results.json", "r", encoding="utf-8"))

report_details = json.load(open(path / "report_details.json", "r"))
report_details = json.load(open(path / "report_details.json", "r", encoding="utf-8"))
testset._dataframe.index = testset._dataframe.index.astype(str)

report = cls(testset, model_outputs, metrics_results, knowledge_base)
Expand Down
2 changes: 1 addition & 1 deletion giskard/registry/giskard_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _load_meta_locally(cls, local_dir, uuid: str) -> Optional[TestFunctionMeta]:
if meta is not None:
return meta

with open(local_dir / "meta.yaml", "r") as f:
with open(local_dir / "meta.yaml", "r", encoding="utf-8") as f:
return yaml.load(f, Loader=yaml.Loader)

@classmethod
Expand Down
12 changes: 6 additions & 6 deletions giskard/scanner/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def to_json(self, filename=None):
results[issue.detector_name][issue.level] = []
results[issue.detector_name][issue.level].append(issue.description)
if filename is not None:
with open(filename, "w") as json_file:
json.dump(results, json_file, indent=4)
with open(filename, "w", encoding="utf-8") as json_file:
json.dump(results, json_file, indent=4, ensure_ascii=False)
else:
return json.dumps(results, indent=4)

Expand All @@ -115,7 +115,7 @@ def to_html(self, filename=None, embed=False):
html = widget.render_html(embed=embed)

if filename is not None:
with open(filename, "w") as f:
with open(filename, "w", encoding="utf-8") as f:
f.write(html)
return

Expand All @@ -139,7 +139,7 @@ def to_markdown(self, filename=None, template="summary"):
markdown = widget.render_markdown(template=template)

if filename is not None:
with open(filename, "w") as f:
with open(filename, "w", encoding="utf-8") as f:
f.write(markdown)
return

Expand Down Expand Up @@ -349,7 +349,7 @@ def to_avid(self, filename=None):
]

if filename is not None:
with open(filename, "w") as f, warnings.catch_warnings():
with open(filename, "w", encoding="utf-8") as f, warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning) # we need to support both pydantic 1 & 2
f.writelines(r.json() + "\n" for r in reports)
return
Expand All @@ -373,7 +373,7 @@ def generate_rails(self, filename=None, colang_version="1.0"):
_rails = generate_rails_from_scan_report(self, colang_version=colang_version)

if filename:
with open(filename, "a") as f:
with open(filename, "a", encoding="utf-8") as f:
f.write(_rails)
return

Expand Down
2 changes: 1 addition & 1 deletion giskard/scanner/robustness/text_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ class TextNationalityTransformation(TextLanguageBasedTransformation):
name = "Switch countries from high- to low-income and vice versa"

def _load_dictionaries(self):
with Path(__file__).parent.joinpath("nationalities.json").open("r") as f:
with Path(__file__).parent.joinpath("nationalities.json").open("r", encoding="utf-8") as f:
nationalities_dict = json.load(f)
self._lang_dictionary = {"en": nationalities_dict["en"], "fr": nationalities_dict["fr"]}

Expand Down
8 changes: 5 additions & 3 deletions giskard/visualization/widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,11 @@ def render_html(self, template="full", embed=False) -> str:
escaped = escape(html)
uid = id(self)

with Path(__file__).parent.joinpath("templates", "scan_report", "html", "static", "external.js").open(
"r"
) as f:
with (
Path(__file__)
.parent.joinpath("templates", "scan_report", "html", "static", "external.js")
.open("r", encoding="utf-8") as f
):
js_lib = f.read()

html = f"""<iframe id="scan-{uid}" srcdoc="{escaped}" style="width: 100%; border: none;" class="gsk-scan"></iframe>
Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures/enron_multilabel_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

# get_labels returns a dictionary representation of these labels.
def get_labels(filename):
with open(filename + ".cats") as f:
with open(filename + ".cats", encoding="utf-8") as f:
labels = defaultdict(dict)
line = f.readline()
while line:
Expand Down Expand Up @@ -99,7 +99,7 @@ def enron_raw_data_full() -> pd.DataFrame:

# Features are metadata from the email object
filename = email_file + ".txt"
with open(filename) as f:
with open(filename, encoding="utf-8") as f:
message = email.message_from_string(f.read())

values_to_add["Subject"] = str(message["Subject"])
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/test_avid.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_scan_report_can_be_exported_to_avid():
dest_path = Path(tmpdir).joinpath("test_report.avid")
report.to_avid(dest_path)

with dest_path.open("r") as f:
with dest_path.open("r", encoding="utf-8") as f:
avid_reports_read = [json.loads(line) for line in f.readlines()]

assert len(avid_reports_read) == len(avid_reports)
Expand Down
26 changes: 22 additions & 4 deletions tests/integrations/test_nemoguardrails.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,39 @@
import json
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch

import pandas as pd
import pytest
from nemoguardrails.colang import parse_colang_file

from giskard.llm.client.base import ChatMessage
from giskard.scanner.issues import Issue, Robustness
from giskard.scanner.report import ScanReport


def _generate_rails(report: ScanReport, filename=None, colang_version="1.0"):
if filename:
with tempfile.TemporaryDirectory() as tmpdir:
dest = Path(tmpdir).joinpath("rails.co")
report.generate_rails(filename=dest, colang_version=colang_version)
assert dest.exists()
assert dest.is_file()
rails = dest.read_text(encoding="utf-8")
else:
rails = report.generate_rails(colang_version=colang_version)
return rails


@pytest.mark.parametrize("filename", [(None), ("rails.co")])
@patch("giskard.integrations.nemoguardrails.get_default_client")
def test_generate_colang_v1_rails_from_scan(get_default_client_mock):
def test_generate_colang_v1_rails_from_scan(get_default_client_mock, filename):
report = make_test_report()

llm_client = get_default_client_mock()
llm_client.complete.side_effect = make_llm_answers()

rails = report.generate_rails()
rails = _generate_rails(report, filename=filename, colang_version="1.0")

# Check that the file is correctly formatted
parsed = parse_colang_file("rails.co", rails, version="1.0")
Expand All @@ -27,14 +44,15 @@ def test_generate_colang_v1_rails_from_scan(get_default_client_mock):
assert parsed["flows"][1]["id"] == "ask help on illegal activities"


@pytest.mark.parametrize("filename", [(None), ("rails.co")])
@patch("giskard.integrations.nemoguardrails.get_default_client")
def test_generate_colang_v2_rails_from_scan(get_default_client_mock):
def test_generate_colang_v2_rails_from_scan(get_default_client_mock, filename):
report = make_test_report()

llm_client = get_default_client_mock()
llm_client.complete.side_effect = make_llm_answers()

rails = report.generate_rails(colang_version="2.x")
rails = _generate_rails(report, filename=filename, colang_version="2.x")

# Check that the file is correctly formatted
parsed = parse_colang_file("rails.co", rails, version="2.x")
Expand Down
2 changes: 1 addition & 1 deletion tests/registry/module_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PythonModule:

def _write_file(dir: Path, file: Union[str, Path], content: str):
os.makedirs(os.path.dirname(dir / file), exist_ok=True)
with open(dir / file, "w") as f:
with open(dir / file, "w", encoding="utf-8") as f:
f.write(content)


Expand Down
2 changes: 1 addition & 1 deletion tests/scan/test_scan_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_scan_report_exports_to_markdown():

assert dest.exists()
assert dest.is_file()
assert dest.read_text() == markdown
assert dest.read_text(encoding="utf-8") == markdown


def test_scan_report_to_json():
Expand Down
34 changes: 34 additions & 0 deletions tests/scan/test_scanner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import re
import sys
import tempfile
import warnings
from pathlib import Path
from unittest import mock

import numpy as np
Expand Down Expand Up @@ -279,3 +281,35 @@ def test_min_slice_size(titanic_model, titanic_dataset):
detector = SpuriousCorrelationDetector(min_slice_size=2000)
issues = detector.run(titanic_model, titanic_dataset, features=titanic_model.feature_names)
assert len(issues) == 0


@pytest.mark.parametrize(
"filename",
[(None), ("scan_test_suite_results.json")],
)
@pytest.mark.slow
def test_export_scan_test_suite_results_to_json(filename, request):
DATASET_NAME = "diabetes_dataset_with_target"
MODEL_NAME = "linear_regression_diabetes"

dataset = request.getfixturevalue(DATASET_NAME)
model = request.getfixturevalue(MODEL_NAME)

scanner = Scanner()
scan_results = scanner.analyze(model, dataset)
test_suite_results = scan_results.generate_test_suite().run()

if filename:
with tempfile.TemporaryDirectory() as tmpdir:
dest = Path(tmpdir).joinpath(filename)
test_suite_results.to_json(dest)
assert dest.exists()
assert dest.is_file()
test_results_json = dest.read_text(encoding="utf-8")

else:
test_results_json = test_suite_results.to_json()
assert test_results_json is not None

assert test_results_json.startswith("{")
assert test_results_json.strip().endswith("}")
Loading