Skip to content

Commit 077e23c

Browse files
authored
Merge pull request #49 from Giskard-AI/GSK-2864-fragment-markdown-report
Fragment issues and examples if they are too long
2 parents c2f8538 + 5812e27 commit 077e23c

File tree

2 files changed

+54
-25
lines changed

2 files changed

+54
-25
lines changed

giskard_cicd/automation/post_discussion.py

Lines changed: 53 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import markdown
44
import re
55
from time import sleep
6-
from .utils import ISSUE_GROUPS
6+
import logging
77

8+
logger = logging.getLogger(__file__)
89
GISKARD_HUB_URL = "https://huggingface.co/spaces/giskardai/giskard"
910

1011
def construct_opening(dataset_id, dataset_config, dataset_split, vulnerability_count):
@@ -73,37 +74,65 @@ def save_post(report_path, path, dataset_id, dataset_config, dataset_split):
7374
with open(path, "w") as f:
7475
f.write(post)
7576

76-
77-
def separate_report_by_issues(report):
78-
# TODO: add markdown comments to the report as a split marker
79-
regex = (
80-
"\W(?="
81-
+ "|".join(["<details>\n<summary>👉" + issue for issue in ISSUE_GROUPS])
82-
+ ")"
83-
)
84-
sub_reports = re.split(regex, report)
85-
return sub_reports
86-
77+
class Issue:
78+
def __init__(self, description, examples):
79+
self.description = description
80+
self.examples = examples
81+
82+
def __len__(self):
83+
return len(self.description) + len(self.examples)
84+
85+
def trim_examples(self):
86+
# get characters count of the examples
87+
if len(self.examples) > 60000:
88+
self.examples = "examples are too long to be displayed"
89+
90+
def load_report_to_issues(report):
91+
splited_issues = []
92+
# <!-- issue --> is used to separate the issues
93+
issues = [ issue for issue in report.split("<!-- issue -->") if len(issue) > 0 ]
94+
# <!-- examples --> is used to separate the examples
95+
for issue in issues:
96+
descriptions = []
97+
examples = []
98+
splited_issue = issue.split("<!-- examples -->")
99+
descriptions.append(splited_issue[0])
100+
for sub_issue in splited_issue[1:]:
101+
res = sub_issue.split("</details>")
102+
for i in range(0, len(res), 2):
103+
if len(res[i]) == 0 or len(set(res[i])) < 10:
104+
continue
105+
examples.append(res[i])
106+
if i + 1 < len(res):
107+
descriptions.append(res[i + 1])
108+
splited_issues.extend([Issue(description, example) for description, example in zip(descriptions, examples)])
109+
return splited_issues
87110

88111
def post_issue_as_comment(discussion, issue, token, repo_id):
89-
comment = hf_hub.comment_discussion(
90-
repo_id=repo_id,
91-
repo_type="space",
92-
discussion_num=discussion.num,
93-
comment=issue,
94-
token=token,
95-
)
96-
return comment
112+
try:
113+
comment = issue
114+
if isinstance(issue, Issue):
115+
comment = issue.description + issue.examples
116+
hf_hub.comment_discussion(
117+
repo_id=repo_id,
118+
repo_type="space",
119+
discussion_num=discussion.num,
120+
comment=comment,
121+
token=token,
122+
)
123+
except Exception as e:
124+
logger.debug(f"Failed to post issue as comment: {e}")
97125

98126

99127
def post_too_long_report_in_comments(
100128
discussion, report, token, repo_id, test_suite_url=None
101129
):
102-
sub_reports = separate_report_by_issues(report)
103-
104-
for issue in sub_reports:
130+
issues = load_report_to_issues(report)
131+
for issue in issues:
132+
if len(issue) > 60000:
133+
issue.trim_examples()
105134
post_issue_as_comment(discussion, issue, token, repo_id)
106-
135+
sleep(1)
107136
post_issue_as_comment(discussion, construct_closing(test_suite_url), token, repo_id)
108137
return discussion
109138

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ version = "0.2.0"
1111
readme = "README.md"
1212
dependencies = [
1313
"datasets",
14-
"giskard >= 2.3.0",
14+
"giskard >= 2.7.3",
1515
"huggingface_hub",
1616
"torch",
1717
"transformers",

0 commit comments

Comments
 (0)