Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
ba85f09
introduced debug_description in test decorator and on the backend side
rabah-khalek Aug 2, 2023
793a570
improved to_json
rabah-khalek Aug 3, 2023
1dd4a40
updated changelog
rabah-khalek Aug 3, 2023
6a1129c
Merge branch 'main' into GSK-1476-debug-description
rabah-khalek Aug 4, 2023
77e3132
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 11, 2023
aeca408
Fix persistence to jakarta
Inokinoki Sep 11, 2023
c399322
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 11, 2023
d164cf8
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 11, 2023
a8a0d6d
Add `debugDescription` in WebSocket DTOs
Inokinoki Sep 12, 2023
ee11648
Fix frontend-related DTO serialization
Inokinoki Sep 12, 2023
9cc2bae
Provide a default value for debug description
Inokinoki Sep 13, 2023
6c61040
Migrate test suite execution related DTO to api-v2
Inokinoki Sep 13, 2023
e068a3f
Simply display debug description
Inokinoki Sep 13, 2023
97320b3
Revert "Migrate test suite execution related DTO to api-v2"
Inokinoki Sep 18, 2023
7271e9f
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 18, 2023
4dcc52e
Manually add debug desc to avoid large migration
Inokinoki Sep 18, 2023
a92520c
Order migration with date
Inokinoki Sep 18, 2023
aca57c2
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 18, 2023
f215ada
Polish wording and UI/UX for debug description
Inokinoki Sep 18, 2023
9b5d42a
Fix icon in debug description popup
Inokinoki Sep 18, 2023
abfb599
Override update for `TestFunction` to update debug description
Inokinoki Sep 19, 2023
05808a4
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 19, 2023
023509e
Use "push" for model insights instead of "debug"
Inokinoki Sep 19, 2023
8c89f5a
adjusted default message
rabah-khalek Sep 19, 2023
a80d34f
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 19, 2023
6d01477
Merge branch 'main' into GSK-1476-debug-description
rabah-khalek Sep 20, 2023
abea382
[GSK-1715] Filling per-test debug descriptions (#1412)
rabah-khalek Sep 20, 2023
05e9ba8
solving [GSK-1768]
rabah-khalek Sep 20, 2023
5c64a68
silenced sonar
rabah-khalek Sep 20, 2023
cc84579
silenced sonar
rabah-khalek Sep 20, 2023
a08e57d
Merge branch 'main' into GSK-1476-debug-description
andreybavt Sep 21, 2023
54fd0cd
Merge branch 'main' into GSK-1476-debug-description
rabah-khalek Sep 25, 2023
e908c80
Merge branch 'main' into GSK-1476-debug-description
rabah-khalek Sep 26, 2023
2489d7a
Merge branch 'main' of https://github.com/Giskard-AI/giskard into GSK…
Inokinoki Sep 26, 2023
b685bcd
Merge branch 'main' into GSK-1476-debug-description
rabah-khalek Sep 27, 2023
60f5246
Merge branch 'main' into GSK-1476-debug-description
rabah-khalek Sep 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions backend/src/main/java/ai/giskard/domain/TestFunction.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
package ai.giskard.domain;

import ai.giskard.domain.ml.SuiteTest;

import com.fasterxml.jackson.annotation.JsonIgnore;
import jakarta.persistence.CascadeType;
import jakarta.persistence.DiscriminatorValue;
import jakarta.persistence.Entity;
import jakarta.persistence.OneToMany;
import jakarta.persistence.*;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.lang3.builder.EqualsBuilder;
Expand All @@ -22,6 +20,8 @@ public class TestFunction extends Callable implements Serializable {
@OneToMany(mappedBy = "testFunction", cascade = CascadeType.ALL)
@JsonIgnore
private List<SuiteTest> suiteTests;
@Column
private String debugDescription;

@Override
public boolean equals(Object o) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import lombok.Getter;

@Getter
public class MLWorkerWSDatasetProcessFunctionMetaDTO extends MLWorkerWSFunctionMetaDTO {
public class MLWorkerWSDatasetProcessFunctionMetaDTO extends MLWorkerWSFunctionMetaBaseDTO {
private Boolean cellLevel;

private String columnType;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package ai.giskard.ml.dto;

import lombok.Getter;

import java.util.List;

@Getter
public class MLWorkerWSFunctionMetaBaseDTO implements MLWorkerWSBaseDTO {
private String uuid;

private String name;

private String displayName;

private Integer version;

private String module;

private String doc;

private String moduleDoc;

private List<MLWorkerWSTestFunctionArgumentDTO> args;

private List<String> tags;

private String code;

private String type;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,7 @@

import lombok.Getter;

import java.util.List;

@Getter
public class MLWorkerWSFunctionMetaDTO implements MLWorkerWSBaseDTO {
private String uuid;

private String name;

private String displayName;

private Integer version;

private String module;

private String doc;

private String moduleDoc;

private List<MLWorkerWSTestFunctionArgumentDTO> args;

private List<String> tags;

private String code;

private String type;
public class MLWorkerWSFunctionMetaDTO extends MLWorkerWSFunctionMetaBaseDTO {
private String debugDescription;
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,10 @@ protected TestFunction create(TestFunctionDTO dto) {
return function;
}

@Override
protected TestFunction update(TestFunction existingCallable, TestFunctionDTO dto) {
existingCallable = super.update(existingCallable, dto);
existingCallable.setDebugDescription(dto.getDebugDescription());
return existingCallable;
}
}
5 changes: 5 additions & 0 deletions backend/src/main/java/ai/giskard/web/dto/TestFunctionDTO.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package ai.giskard.web.dto;

import com.dataiku.j2ts.annotations.UIModel;
import com.fasterxml.jackson.annotation.JsonAlias;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;

@Data
@EqualsAndHashCode(callSuper = true)
@AllArgsConstructor
@NoArgsConstructor
@UIModel
public class TestFunctionDTO extends CallableDTO {
@JsonAlias("debug_description")
private String debugDescription;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.1" encoding="UTF-8" standalone="no"?>
<databaseChangeLog xmlns="http://www.liquibase.org/xml/ns/dbchangelog" xmlns:ext="http://www.liquibase.org/xml/ns/dbchangelog-ext" xmlns:pro="http://www.liquibase.org/xml/ns/pro" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog-ext http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-ext.xsd http://www.liquibase.org/xml/ns/pro http://www.liquibase.org/xml/ns/pro/liquibase-pro-4.1.xsd http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.1.xsd">
<changeSet author="rak (generated)" id="1691072393152-5">
<addColumn tableName="callable_functions">
<column name="debug_description" type="varchar(255)"/>
</addColumn>
</changeSet>
</databaseChangeLog>
1 change: 1 addition & 0 deletions backend/src/main/resources/config/liquibase/master.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
<include file="config/liquibase/changelog/20230712135428_changelog.xml" relativeToChangelogFile="false"/>
<include file="config/liquibase/changelog/clean_old_tests.xml" relativeToChangelogFile="false"/>
<include file="config/liquibase/changelog/migrate-process_type.xml" relativeToChangelogFile="false"/>
<include file="config/liquibase/changelog/20230803161938_changelog.xml" relativeToChangelogFile="false"/>
<include file="config/liquibase/changelog/20230818143554_changelog.xml" relativeToChangelogFile="false"/>
<include file="config/liquibase/changelog/20230822143427_changelog.xml" relativeToChangelogFile="false"/>
<include file="config/liquibase/changelog/20230823101750_changelog.xml" relativeToChangelogFile="false"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ import type {CallableDTO} from './callable-dto';
/**
* Generated from ai.giskard.web.dto.TestFunctionDTO
*/
export interface TestFunctionDTO extends CallableDTO {}
export interface TestFunctionDTO extends CallableDTO {
debugDescription: string | null;
}
37 changes: 36 additions & 1 deletion frontend/src/views/main/project/SuiteTestExecutionCard.vue
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<v-icon class='mr-1'>{{ TEST_RESULT_DATA[result.status].icon }}</v-icon>
{{ TEST_RESULT_DATA[result.status].capitalized }}
</v-chip>
<v-btn color='primary' @click='debugTest' outlined small :disabled='!canBeDebugged' :loading='loading'>
<v-btn color='primary' @click='debugDescDialog = true' outlined small :disabled='!canBeDebugged' :loading='loading'>
<v-icon small>info</v-icon>
Debug
</v-btn>
Expand Down Expand Up @@ -80,6 +80,40 @@
</v-card>
</v-dialog>
</div>
<div>
<v-dialog
v-model="debugDescDialog"
width="auto"
>
<v-card>
<v-card-title>
Debug the {{ suiteTest.test?.displayName }} test
</v-card-title>
<v-card-text>
<!-- //NOSONAR --><p v-html="suiteTest.test.debugDescription"/>
<p>
This will enable you to:
<ul>
<li>Understand why the test fail looking at model explanation</li>
<li>Create new tests by looking at the model insights <v-icon>mdi-alert-outline</v-icon></li>
<li>Collect feedback to integrate domain knowledge <v-icon>mdi-comment-multiple</v-icon></li>
</ul>
</p>

Start debugging?
</v-card-text>
<v-card-actions>
<v-spacer></v-spacer>
<v-btn color="error" text @click="debugDescDialog = false;">
No
</v-btn>
<v-btn color="primary" text @click="debugDescDialog = false; debugTest()">
Yes
</v-btn>
</v-card-actions>
</v-card>
</v-dialog>
</div>
</div>
</template>

Expand Down Expand Up @@ -117,6 +151,7 @@ const props = defineProps<{
const loading = ref<boolean>(false);
const modelDialog = ref<boolean>(false);
const selectedModel = ref<string>("");
const debugDescDialog = ref<boolean>(false);

const params = computed(() => props.isPastExecution && props.result
? props.result?.inputs
Expand Down
25 changes: 25 additions & 0 deletions python-client/giskard/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,36 @@ def __repr__(self) -> str:


class TestFunctionMeta(CallableMeta):
debug_description: str

def __init__(
self,
callable_obj: Union[Callable, Type] = None,
name: Optional[str] = None,
tags: List[str] = None,
debug_description: str = None,
version: Optional[int] = None,
type: str = None,
):
super().__init__(callable_obj, name, tags, version, type)
self.debug_description = debug_description

def extract_parameters(self, callable_obj):
parameters = unknown_annotations_to_kwargs(CallableMeta.extract_parameters(self, callable_obj))

return {p.name: p for p in parameters}

def to_json(self):
json = super().to_json()
return {
**json,
"debug_description": self.debug_description,
}

def init_from_json(self, json: Dict[str, Any]):
super().init_from_json(json)
self.debug_description = json["debug_description"] if "debug_description" in json.keys() else None


class DatasetProcessFunctionType(Enum):
CLAUSES = "CLAUSES"
Expand Down
11 changes: 9 additions & 2 deletions python-client/giskard/ml_worker/testing/registry/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
from giskard.ml_worker.testing.registry.giskard_test import GiskardTestMethod, GiskardTest


def test(_fn=None, name=None, tags: Optional[List[str]] = None):
def test(
_fn=None,
name=None,
tags: Optional[List[str]] = None,
debug_description: str = "This debugging session opens one by one all the examples that make the test fail.",
):
if sys.version_info >= (3, 10):
import typing as t
else:
Expand All @@ -25,7 +30,9 @@ def inner(
"""
from giskard.ml_worker.testing.registry.registry import tests_registry

tests_registry.register(TestFunctionMeta(original, name=name, tags=tags, type="TEST"))
tests_registry.register(
TestFunctionMeta(original, name=name, tags=tags, debug_description=debug_description, type="TEST")
)

if inspect.isclass(original) and issubclass(original, GiskardTest):
return original
Expand Down
1 change: 1 addition & 0 deletions python-client/giskard/ml_worker/websocket/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class FunctionMeta(BaseModel):
tags: Optional[List[str]] = None
code: Optional[str] = None
type: Optional[str] = None
debugDescription: Optional[str] = None


class DatasetProcessFunctionMeta(BaseModel):
Expand Down
1 change: 1 addition & 0 deletions python-client/giskard/ml_worker/websocket/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def map_function_meta_ws(callable_type):
)
for a in (test.args.values() if test.args else []) # args could be None
],
debugDescription=test.debug_description,
)
for test in tests_registry.get_all().values()
if test.type == callable_type
Expand Down
1 change: 1 addition & 0 deletions python-client/giskard/testing/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
debug_prefix = "Debug: "
debug_description_prefix = "This debugging session opens one by one all the examples "
50 changes: 25 additions & 25 deletions python-client/giskard/testing/tests/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ...ml_worker.testing.registry.slicing_function import SlicingFunction
from ...datasets.base import Dataset
from ...models.base import BaseModel
from . import debug_prefix
from . import debug_prefix, debug_description_prefix


def _calculate_overconfidence_score(model: BaseModel, dataset: Dataset) -> pd.Series:
Expand All @@ -32,14 +32,18 @@ def _default_overconfidence_threshold(model: BaseModel) -> float:
return 1 / (3e-1 * (n - 2) + 2 - 1e-3 * (n - 2) ** 2)


@test(name="Overconfidence Rate", tags=["classification"])
@test(
name="Overconfidence Rate",
tags=["classification"],
debug_description=debug_description_prefix + "that are <b>predicted with overconfidence</b>.",
)
def test_overconfidence_rate(
model: BaseModel,
dataset: Dataset,
slicing_function: Optional[SlicingFunction] = None,
threshold: Optional[float] = 0.10,
p_threshold: Optional[float] = None,
debug: bool = False
model: BaseModel,
dataset: Dataset,
slicing_function: Optional[SlicingFunction] = None,
threshold: Optional[float] = 0.10,
p_threshold: Optional[float] = None,
debug: bool = False,
):
"""Tests that the rate of overconfident predictions is below a threshold.

Expand Down Expand Up @@ -97,11 +101,7 @@ def test_overconfidence_rate(
output_ds.name = debug_prefix + test_name
# ---

return TestResult(
passed=bool(passed),
metric=rate,
output_df=output_ds
)
return TestResult(passed=bool(passed), metric=rate, output_df=output_ds)


def _calculate_underconfidence_score(model: BaseModel, dataset: Dataset) -> pd.Series:
Expand All @@ -116,14 +116,18 @@ def _calculate_underconfidence_score(model: BaseModel, dataset: Dataset) -> pd.S
return pd.Series(score_values, index=dataset.df.index)


@test(name="Underconfidence Rate", tags=["classification"])
@test(
name="Underconfidence Rate",
tags=["classification"],
debug_description=debug_description_prefix + "that are <b>predicted with underconfidence</b>.",
)
def test_underconfidence_rate(
model: BaseModel,
dataset: Dataset,
slicing_function: Optional[SlicingFunction] = None,
threshold: Optional[float] = 0.10,
p_threshold: float = 0.90,
debug: bool = False
model: BaseModel,
dataset: Dataset,
slicing_function: Optional[SlicingFunction] = None,
threshold: Optional[float] = 0.10,
p_threshold: float = 0.90,
debug: bool = False,
):
"""Tests that the rate of underconfident predictions is below a threshold.

Expand Down Expand Up @@ -178,8 +182,4 @@ def test_underconfidence_rate(
output_ds.name = debug_prefix + test_name
# ---

return TestResult(
passed=bool(passed),
metric=rate,
output_df=output_ds
)
return TestResult(passed=bool(passed), metric=rate, output_df=output_ds)
Loading