Skip to content
26 changes: 12 additions & 14 deletions giskard/core/model_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,41 +39,39 @@ def _track_validation_error(err, model, dataset):


def _do_validate_model(model: BaseModel, validate_ds: Optional[Dataset] = None):
model_type = model.meta.model_type
model_type = model.model_type

if isinstance(model, WrapperModel) and model.data_preprocessing_function is not None:
validate_data_preprocessing_function(model.data_preprocessing_function)

if isinstance(model, WrapperModel) and model.model_postprocessing_function is not None:
validate_model_postprocessing_function(model.model_postprocessing_function)

validate_classification_labels(model.meta.classification_labels, model_type)
validate_classification_labels(model.classification_labels, model_type)

if model.is_classification:
validate_classification_threshold_label(model.meta.classification_labels, model.meta.classification_threshold)
validate_classification_threshold_label(model.classification_labels, model.classification_threshold)

assert model.meta.feature_names is None or isinstance(
model.meta.feature_names, list
assert model.feature_names is None or isinstance(
model.feature_names, list
), "Invalid feature_names parameter. Please provide the feature names as a list."

if validate_ds is not None:
validate_is_pandasdataframe(validate_ds.df)
validate_features(feature_names=model.meta.feature_names, validate_df=validate_ds.df)
validate_features(feature_names=model.feature_names, validate_df=validate_ds.df)

if model.is_regression:
validate_model_execution(model, validate_ds)
elif model.is_text_generation:
validate_model_execution(model, validate_ds, False)
elif model.is_classification and validate_ds.target is not None:
target_values = validate_ds.df[validate_ds.target].unique()
validate_label_with_target(
model.meta.name, model.meta.classification_labels, target_values, validate_ds.target
)
validate_label_with_target(model.name, model.classification_labels, target_values, validate_ds.target)
validate_model_execution(model, validate_ds)
else: # Classification with target = None
validate_model_execution(model, validate_ds)

if model.meta.model_type == SupportedModelTypes.CLASSIFICATION and validate_ds.target is not None:
if model.model_type == SupportedModelTypes.CLASSIFICATION and validate_ds.target is not None:
validate_order_classifcation_labels(model, validate_ds)


Expand All @@ -89,7 +87,7 @@ def validate_model_execution(model: BaseModel, dataset: Dataset, deterministic:
try:
prediction = model.predict(validation_ds)
except Exception as e:
features = model.meta.feature_names if model.meta.feature_names is not None else validation_ds.df.columns
features = model.feature_names if model.feature_names is not None else validation_ds.df.columns
number_of_features = len(features)

# Some models (mostly sklearn) expect a 1-dimensional ndarray or pd.Series as input in the case they're
Expand Down Expand Up @@ -125,9 +123,9 @@ def validate_model_execution(model: BaseModel, dataset: Dataset, deterministic:

if deterministic:
validate_deterministic_model(model, validation_ds, prediction)
validate_prediction_output(validation_ds, model.meta.model_type, prediction.raw)
validate_prediction_output(validation_ds, model.model_type, prediction.raw)
if model.is_classification:
validate_classification_prediction(model.meta.classification_labels, prediction.raw)
validate_classification_prediction(model.classification_labels, prediction.raw)


@configured_validate_arguments
Expand Down Expand Up @@ -311,7 +309,7 @@ def validate_order_classifcation_labels(model, dataset):
y_true = dataset.df[dataset.target]
y_pred = model.predict(dataset).prediction
balanced_accuracy = balanced_accuracy_score(y_true, y_pred)
num_classes = len(model.meta.classification_labels)
num_classes = len(model.classification_labels)

if balanced_accuracy <= 1 / num_classes:
warning(
Expand Down
6 changes: 3 additions & 3 deletions giskard/llm/evaluators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def __init__(self, eval_prompt=None, llm_temperature=0.1, llm_client: LLMClient

def _make_evaluate_prompt(self, model: BaseModel, input_vars, model_output, row_idx):
return self.eval_prompt.format(
model_name=model.meta.name,
model_description=model.meta.description,
model_name=model.name,
model_description=model.description,
input_vars=input_vars,
model_output=model_output,
)
Expand All @@ -90,7 +90,7 @@ def evaluate(self, model: BaseModel, dataset: Dataset):
errored = []
for row_index, input_vars, model_output in zip(
dataset.df.index,
dataset.df.loc[:, model.meta.feature_names].to_dict("records"),
dataset.df.loc[:, model.feature_names].to_dict("records"),
model_outputs,
):
sample = {"input_vars": input_vars, "model_output": model_output}
Expand Down
4 changes: 2 additions & 2 deletions giskard/llm/evaluators/coherency.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ def evaluate(self, model: BaseModel, dataset_1: Dataset, dataset_2: Optional[Dat

def _eval_pair(self, model: BaseModel, input_1, input_2, output_1, output_2):
prompt = self.eval_prompt.format(
model_name=model.meta.name,
model_description=model.meta.description,
model_name=model.name,
model_description=model.description,
input_1=input_1,
input_2=input_2,
output_1=output_1,
Expand Down
4 changes: 2 additions & 2 deletions giskard/llm/evaluators/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def requirements(self, row_idx) -> str:

def _make_evaluate_prompt(self, model: BaseModel, input_vars, model_output, row_idx):
return self.eval_prompt.format(
model_name=model.meta.name,
model_description=model.meta.description,
model_name=model.name,
model_description=model.description,
input_vars=input_vars,
model_output=model_output,
requirements=self.requirements(row_idx),
Expand Down
2 changes: 1 addition & 1 deletion giskard/llm/evaluators/string_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def evaluate(self, model: BaseModel, dataset: Dataset, evaluator_configs: List[S
failed = []
failed_idx = []
errored = []
model_inputs = dataset.df.loc[:, model.meta.feature_names].to_dict("records")
model_inputs = dataset.df.loc[:, model.feature_names].to_dict("records")
model_outputs = model.predict(dataset).prediction

for idx, inputs, outputs, config in zip(dataset.df.index, model_inputs, model_outputs, evaluator_configs):
Expand Down
6 changes: 3 additions & 3 deletions giskard/llm/generators/adversarial.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ def _make_dataset_name(self, model: BaseModel, num_samples):
def _make_generate_input_prompt(self, model: BaseModel, num_inputs: int):
input_prompt = self.prompt.format(
issue_description=self.issue_description,
model_name=model.meta.name,
model_description=model.meta.description,
feature_names=", ".join(model.meta.feature_names),
model_name=model.name,
model_description=model.description,
feature_names=", ".join(model.feature_names),
num_samples=num_inputs,
requirement=self.requirement,
)
Expand Down
10 changes: 5 additions & 5 deletions giskard/llm/generators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ def __init__(
class BaseDataGenerator(LLMGenerator):
def _make_generate_input_prompt(self, model: BaseModel, num_samples: int):
input_prompt = self.prompt.format(
model_name=model.meta.name,
model_description=model.meta.description,
feature_names=", ".join(model.meta.feature_names),
model_name=model.name,
model_description=model.description,
feature_names=", ".join(model.feature_names),
num_samples=num_samples,
)
if self.languages:
Expand All @@ -72,7 +72,7 @@ def _make_generate_input_functions(self, model: BaseModel, num_samples: int):
"type": "array",
"items": {
"type": "object",
"properties": {name: {"type": "string"} for name in model.meta.feature_names},
"properties": {name: {"type": "string"} for name in model.feature_names},
},
}
},
Expand All @@ -82,7 +82,7 @@ def _make_generate_input_functions(self, model: BaseModel, num_samples: int):
]

def _make_dataset_name(self, model: BaseModel, num_samples):
return f"Synthetic Test Dataset for {model.meta.name}"
return f"Synthetic Test Dataset for {model.name}"

def generate_dataset(self, model: BaseModel, num_samples: int = 10, column_types=None) -> Dataset:
"""Generates a test dataset for the model.
Expand Down
14 changes: 7 additions & 7 deletions giskard/llm/generators/sycophancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ class SycophancyDataGenerator(LLMGenerator):

def _make_generate_input_prompt(self, model: BaseModel, num_samples):
input_prompt = self.prompt.format(
model_name=model.meta.name,
model_description=model.meta.description,
feature_names=", ".join(model.meta.feature_names),
model_name=model.name,
model_description=model.description,
feature_names=", ".join(model.feature_names),
num_samples=num_samples,
)
if self.languages:
Expand All @@ -64,11 +64,11 @@ def _make_generate_input_functions(self, model: BaseModel):
"properties": {
"input_version_1": {
"type": "object",
"properties": {name: {"type": "string"} for name in model.meta.feature_names},
"properties": {name: {"type": "string"} for name in model.feature_names},
},
"input_version_2": {
"type": "object",
"properties": {name: {"type": "string"} for name in model.meta.feature_names},
"properties": {name: {"type": "string"} for name in model.feature_names},
},
},
},
Expand Down Expand Up @@ -100,12 +100,12 @@ def generate_dataset(self, model: BaseModel, num_samples=10, column_types=None):

dataset_1 = Dataset(
pd.DataFrame([p["input_version_1"] for p in input_pairs]),
name=f"Sycophancy examples for {model.meta.name} (set 1)",
name=f"Sycophancy examples for {model.name} (set 1)",
column_types=column_types,
)
dataset_2 = Dataset(
pd.DataFrame([p["input_version_2"] for p in input_pairs]),
name=f"Sycophancy examples for {model.meta.name} (set 2)",
name=f"Sycophancy examples for {model.name} (set 2)",
column_types=column_types,
)

Expand Down
4 changes: 2 additions & 2 deletions giskard/llm/testcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def __init__(self, issue_description: str, llm_temperature=0.1, llm_client: LLMC
def _make_generate_requirements_prompt(self, model: BaseModel, num_requirements: int):
return GENERATE_REQUIREMENTS_PROMPT.format(
issue_description=self.issue_description,
model_name=model.meta.name,
model_description=model.meta.description,
model_name=model.name,
model_description=model.description,
count=num_requirements,
)

Expand Down
14 changes: 7 additions & 7 deletions giskard/ml_worker/websocket/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,17 +262,17 @@ def on_ml_worker_stop_worker(*args, **kwargs) -> None:

def run_classification_mode(model, dataset, prediction_results):
results = prediction_results.all_predictions
labels = {k: v for k, v in enumerate(model.meta.classification_labels)}
labels = {k: v for k, v in enumerate(model.classification_labels)}
label_serie = dataset.df[dataset.target] if dataset.target else None
if len(model.meta.classification_labels) > 2 or model.meta.classification_threshold is None:
if len(model.classification_labels) > 2 or model.classification_threshold is None:
preds_serie = prediction_results.all_predictions.idxmax(axis="columns")
sorted_predictions = np.sort(prediction_results.all_predictions.values)
abs_diff = pd.Series(
sorted_predictions[:, -1] - sorted_predictions[:, -2],
name="absDiff",
)
else:
diff = prediction_results.all_predictions.iloc[:, 1] - model.meta.classification_threshold
diff = prediction_results.all_predictions.iloc[:, 1] - model.classification_threshold
preds_serie = (diff >= 0).astype(int).map(labels).rename("predictions")
abs_diff = pd.Series(diff.abs(), name="absDiff")
calculated = pd.concat([preds_serie, label_serie, abs_diff], axis=1)
Expand Down Expand Up @@ -430,12 +430,12 @@ def explain_text_ws(
raise ValueError(f"Column {text_column} is not of type text")
text_document = params.columns[text_column]
input_df = pd.DataFrame({k: [v] for k, v in params.columns.items()})
if model.meta.feature_names:
input_df = input_df[model.meta.feature_names]
if model.feature_names:
input_df = input_df[model.feature_names]
(list_words, list_weights) = explain_text(model, input_df, text_column, text_document)
# Classification model contains classification labels, but regression model does not
classification_labels = model.meta.classification_labels if model.meta.classification_labels else ["WEIGHTS"]
list_weights = list_weights if model.meta.classification_labels else [list_weights]
classification_labels = model.classification_labels if model.classification_labels else ["WEIGHTS"]
list_weights = list_weights if model.classification_labels else [list_weights]
map_features_weight = dict(zip(classification_labels, list_weights))
return websocket.ExplainText(
weights={
Expand Down
Loading