Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 53 additions & 24 deletions giskard_cicd/automation/post_discussion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import markdown
import re
from time import sleep
from .utils import ISSUE_GROUPS
import logging

logger = logging.getLogger(__file__)
GISKARD_HUB_URL = "https://huggingface.co/spaces/giskardai/giskard"

def construct_opening(dataset_id, dataset_config, dataset_split, vulnerability_count):
Expand Down Expand Up @@ -73,37 +74,65 @@ def save_post(report_path, path, dataset_id, dataset_config, dataset_split):
with open(path, "w") as f:
f.write(post)


def separate_report_by_issues(report):
# TODO: add markdown comments to the report as a split marker
regex = (
"\W(?="
+ "|".join(["<details>\n<summary>👉" + issue for issue in ISSUE_GROUPS])
+ ")"
)
sub_reports = re.split(regex, report)
return sub_reports

class Issue:
def __init__(self, description, examples):
self.description = description
self.examples = examples

def __len__(self):
return len(self.description) + len(self.examples)

def trim_examples(self):
# get characters count of the examples
if len(self.examples) > 60000:
self.examples = "examples are too long to be displayed"

def load_report_to_issues(report):
splited_issues = []
# <!-- issue --> is used to separate the issues
issues = [ issue for issue in report.split("<!-- issue -->") if len(issue) > 0 ]
# <!-- examples --> is used to separate the examples
for issue in issues:
descriptions = []
examples = []
splited_issue = issue.split("<!-- examples -->")
descriptions.append(splited_issue[0])
for sub_issue in splited_issue[1:]:
res = sub_issue.split("</details>")
for i in range(0, len(res), 2):
if len(res[i]) == 0 or len(set(res[i])) < 10:
continue
examples.append(res[i])
if i + 1 < len(res):
descriptions.append(res[i + 1])
splited_issues.extend([Issue(description, example) for description, example in zip(descriptions, examples)])
return splited_issues

def post_issue_as_comment(discussion, issue, token, repo_id):
comment = hf_hub.comment_discussion(
repo_id=repo_id,
repo_type="space",
discussion_num=discussion.num,
comment=issue,
token=token,
)
return comment
try:
comment = issue
if isinstance(issue, Issue):
comment = issue.description + issue.examples
hf_hub.comment_discussion(
repo_id=repo_id,
repo_type="space",
discussion_num=discussion.num,
comment=comment,
token=token,
)
except Exception as e:
logger.debug(f"Failed to post issue as comment: {e}")


def post_too_long_report_in_comments(
discussion, report, token, repo_id, test_suite_url=None
):
sub_reports = separate_report_by_issues(report)

for issue in sub_reports:
issues = load_report_to_issues(report)
for issue in issues:
if len(issue) > 60000:
issue.trim_examples()
post_issue_as_comment(discussion, issue, token, repo_id)

sleep(1)
post_issue_as_comment(discussion, construct_closing(test_suite_url), token, repo_id)
return discussion

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ version = "0.2.0"
readme = "README.md"
dependencies = [
"datasets",
"giskard >= 2.3.0",
"giskard >= 2.7.3",
"huggingface_hub",
"torch",
"transformers",
Expand Down