Skip to content

Commit dd3b21c

Browse files
committed
Add support for admin-only evaluation feedback.
Admin-only feedback is either automatically generated by the white diff comparison step, or by outputting an additional line on stderr from the checker that starts with `ADMIN_MESSAGE:`.
1 parent fcb1905 commit dd3b21c

11 files changed

Lines changed: 189 additions & 153 deletions

File tree

cms/db/submission.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,9 @@ class Evaluation(Base):
766766
nullable=False,
767767
default=[])
768768

769+
# Admin-facing output from the grader.
770+
admin_text: str | None = Column(String, nullable=True, default=None)
771+
769772
# Evaluation's time and wall-clock time, in seconds.
770773
execution_time: float | None = Column(
771774
Float,

cms/grading/Job.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __init__(
9393
info: str | None = None,
9494
success: bool | None = None,
9595
text: list[str] | None = None,
96+
admin_text: str | None = None,
9697
files: dict[str, File] | None = None,
9798
managers: dict[str, Manager] | None = None,
9899
executables: dict[str, Executable] | None = None,
@@ -121,6 +122,8 @@ def __init__(
121122
to be presented to the user. The first item is a string,
122123
potentially with %-escaping; the following items are the
123124
values to be %-formatted into the first.
125+
admin_text: description of the outcome of the job,
126+
to be shown to admins.
124127
files: files submitted by the user.
125128
managers: managers provided by the admins.
126129
executables: executables created in the compilation.
@@ -155,6 +158,7 @@ def __init__(
155158

156159
self.success = success
157160
self.text = text
161+
self.admin_text = admin_text
158162

159163
self.files = files
160164
self.managers = managers
@@ -178,6 +182,7 @@ def export_to_dict(self) -> dict:
178182
'info': self.info,
179183
'success': self.success,
180184
'text': self.text,
185+
'admin_text': self.admin_text,
181186
'files': dict((k, v.digest)
182187
for k, v in self.files.items()),
183188
'managers': dict((k, v.digest)
@@ -316,6 +321,7 @@ def __init__(
316321
compilation_success: bool | None = None,
317322
executables: dict[str, Executable] | None = None,
318323
text: list[str] | None = None,
324+
admin_text: str | None = None,
319325
plus: dict | None = None,
320326
):
321327
"""Initialization.
@@ -331,7 +337,7 @@ def __init__(
331337
Job.__init__(self, operation, task_type, task_type_parameters,
332338
language, multithreaded_sandbox, archive_sandbox,
333339
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
334-
text, files, managers, executables)
340+
text, admin_text, files, managers, executables)
335341
self.compilation_success = compilation_success
336342
self.plus = plus
337343

@@ -537,6 +543,7 @@ def __init__(
537543
success: bool | None = None,
538544
outcome: str | None = None,
539545
text: list[str] | None = None,
546+
admin_text: list[str] | None = None,
540547
user_output: str | None = None,
541548
plus: dict | None = None,
542549
only_execution: bool | None = False,
@@ -567,7 +574,7 @@ def __init__(
567574
Job.__init__(self, operation, task_type, task_type_parameters,
568575
language, multithreaded_sandbox, archive_sandbox,
569576
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
570-
text, files, managers, executables)
577+
text, admin_text, files, managers, executables)
571578
self.input = input
572579
self.output = output
573580
self.time_limit = time_limit
@@ -653,6 +660,7 @@ def to_submission(self, sr: SubmissionResult):
653660

654661
sr.evaluations += [Evaluation(
655662
text=self.text,
663+
admin_text=self.admin_text,
656664
outcome=self.outcome,
657665
execution_time=self.plus.get('execution_time'),
658666
execution_wall_clock_time=self.plus.get(

cms/grading/scoretypes/abc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ def get_html_details(
144144
translation=translation,
145145
gettext=_, ngettext=n_)
146146
except Exception:
147-
logger.error("Found an invalid score details string. "
148-
"Try invalidating scores.")
147+
logger.exception("Found an invalid score details string. "
148+
"Try invalidating scores.")
149149
return _("Score details temporarily unavailable.")
150150

151151
@abstractmethod

cms/grading/steps/trusted.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,14 @@ def _sanitize_message(string: str) -> str:
7777
return string.replace('%', '%%')
7878

7979

80-
def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
80+
def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str], str]:
8181
"""Extract the outcome and the text from the a standard manager output.
8282
8383
sandbox: the sandbox whose last execution was a manager writing
8484
a standard manager output.
8585
86-
return: outcome and text.
86+
return: outcome, contestant-facing text and admin-facing text
87+
(not translated).
8788
8889
raise (ValueError): if cannot decode the data.
8990
raise (FileNotFoundError): if any of the sandbox stdout or stderr file
@@ -108,6 +109,23 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
108109
logger.error("Manager stderr (text) is malformed. %r", error)
109110
raise error
110111

112+
# Parse special commands
113+
admin_text = None
114+
for line in stderr_file.readlines():
115+
line = line.strip()
116+
if not line:
117+
continue
118+
119+
PREFIX = "ADMIN_MESSAGE:"
120+
if line.startswith(PREFIX):
121+
line = _sanitize_message(line[len(PREFIX):].strip())
122+
if admin_text is not None:
123+
admin_text = admin_text + " " + line
124+
else:
125+
admin_text = line
126+
else:
127+
logger.warning(f"Unknown special manager command `{line}`")
128+
111129
try:
112130
outcome = float(outcome)
113131
except ValueError:
@@ -125,7 +143,7 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
125143
logger.warning("Manager asked to translate text, but string "
126144
"'%s' is not recognized." % remaining)
127145

128-
return outcome, [text]
146+
return outcome, [text], admin_text
129147

130148

131149
def trusted_step(
@@ -213,7 +231,8 @@ def checker_step(
213231
extra_args: extra arguments to pass to the checker.
214232
215233
return: success (true if the checker was able to check the solution
216-
successfully), outcome and text (both None if success is False).
234+
successfully), outcome, text and admin_text (all None if success
235+
is False).
217236
218237
"""
219238
# Check that the file we are going to inject in the sandbox are not already
@@ -224,12 +243,12 @@ def checker_step(
224243
if sandbox.file_exists(filename):
225244
logger.error("File %s already in the sandbox for the checker.",
226245
filename)
227-
return False, None, None
246+
return False, None, None, None
228247

229248
# Copy the checker in the sandbox, after making sure it was provided.
230249
if checker_digest is None:
231250
logger.error("Configuration error: missing checker in task managers.")
232-
return False, None, None
251+
return False, None, None, None
233252
sandbox.create_file_from_storage(CHECKER_FILENAME, checker_digest,
234253
executable=True)
235254

@@ -247,17 +266,17 @@ def checker_step(
247266
if not box_success or not success:
248267
logger.error("Sandbox failed during checker step. "
249268
"See previous logs for the reason.")
250-
return False, None, None
269+
return False, None, None, None
251270

252271
# Extract outcome and text assuming a standard manager output.
253272
try:
254-
outcome, text = extract_outcome_and_text(sandbox)
273+
outcome, text, admin_text = extract_outcome_and_text(sandbox)
255274
except ValueError as e:
256275
logger.error("Invalid output from checker: %s", e)
257-
return False, None, None
276+
return False, None, None, None
258277
except FileNotFoundError as e:
259278
# This should not happen, as the redirect is handled by the sandbox.
260279
logger.error("Missing stdout or stderr file from checker: %s", e)
261-
return False, None, None
280+
return False, None, None, None
262281

263-
return True, outcome, text
282+
return True, outcome, text, admin_text

cms/grading/steps/whitediff.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,28 +89,35 @@ def _white_diff(output: typing.BinaryIO, res: typing.BinaryIO) -> bool:
8989
9090
"""
9191

92+
line = 0
93+
9294
while True:
9395
lout = output.readline()
9496
lres = res.readline()
97+
line += 1
9598

9699
# Both files finished: comparison succeded
97100
if len(lres) == 0 and len(lout) == 0:
98-
return True
101+
return True, None
99102

100103
# Only one file finished: ok if the other contains only blanks
101104
elif len(lres) == 0 or len(lout) == 0:
102105
lout = lout.strip(b''.join(_WHITES))
103106
lres = lres.strip(b''.join(_WHITES))
104-
if len(lout) > 0 or len(lres) > 0:
105-
return False
107+
if len(lout) > 0:
108+
return False, "Contestant output too long"
109+
if len(lres) > 0:
110+
return False, "Contestant output too short"
106111

107112
# Both file still have lines to go: ok if they agree except
108113
# for the number of whitespaces
109114
else:
110115
lout = _white_diff_canonicalize(lout)
111116
lres = _white_diff_canonicalize(lres)
112117
if lout != lres:
113-
return False
118+
lout = lout.decode("utf-8", errors='backslashreplace')
119+
lres = lres.decode("utf-8", errors='backslashreplace')
120+
return False, f"Expected `{lres}`, found `{lout}` on line {line}"
114121

115122

116123
def white_diff_fobj_step(
@@ -129,10 +136,11 @@ def white_diff_fobj_step(
129136
return: the outcome as above and a description text.
130137
131138
"""
132-
if _white_diff(output_fobj, correct_output_fobj):
133-
return 1.0, [EVALUATION_MESSAGES.get("success").message]
139+
correct, admin_text = _white_diff(output_fobj, correct_output_fobj)
140+
if correct:
141+
return 1.0, [EVALUATION_MESSAGES.get("success").message], admin_text
134142
else:
135-
return 0.0, [EVALUATION_MESSAGES.get("wrong").message]
143+
return 0.0, [EVALUATION_MESSAGES.get("wrong").message], admin_text
136144

137145

138146
def white_diff_step(

cms/grading/tasktypes/Batch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, st
367367
if box_success:
368368
assert (output_file_params is None) == (outcome is not None)
369369
if output_file_params is not None:
370-
box_success, outcome, text = eval_output(
370+
box_success, outcome, text, admin_text = eval_output(
371371
file_cacher, job,
372372
self.CHECKER_CODENAME
373373
if self._uses_checker() else None,
@@ -378,6 +378,7 @@ def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, st
378378
job.outcome = str(outcome) if outcome is not None else None
379379
job.text = text
380380
job.plus = stats
381+
job.admin_text = admin_text
381382

382383
if sandbox is not None:
383384
delete_sandbox(sandbox, job)

cms/grading/tasktypes/Communication.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ def evaluate(self, job, file_cacher):
415415

416416
# Otherwise, we use the manager to obtain the outcome.
417417
else:
418-
outcome, text = extract_outcome_and_text(sandbox_mgr)
418+
outcome, text, admin_text = extract_outcome_and_text(sandbox_mgr)
419419

420420
# If asked so, save the output file with additional information,
421421
# provided that it exists.
@@ -433,6 +433,7 @@ def evaluate(self, job, file_cacher):
433433
job.outcome = "%s" % outcome if outcome is not None else None
434434
job.text = text
435435
job.plus = stats_user
436+
job.admin_text = admin_text
436437

437438
delete_sandbox(sandbox_mgr, job)
438439
for s in sandbox_user:

cms/grading/tasktypes/OutputOnly.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def evaluate(self, job, file_cacher):
124124
return
125125

126126
# First and only step: eval the user output.
127-
box_success, outcome, text = eval_output(
127+
box_success, outcome, text, admin_text = eval_output(
128128
file_cacher, job,
129129
OutputOnly.CHECKER_CODENAME if self._uses_checker() else None,
130130
user_output_digest=job.files[user_output_filename].digest)
@@ -133,5 +133,6 @@ def evaluate(self, job, file_cacher):
133133
job.success = box_success
134134
job.outcome = str(outcome) if outcome is not None else None
135135
job.text = text
136+
job.admin_text = admin_text
136137
# There is no actual evaluation, so no statistics.
137138
job.plus = {} if box_success else None

cms/grading/tasktypes/TwoSteps.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def evaluate(self, job, file_cacher):
333333

334334
# Otherwise evaluate the output file.
335335
else:
336-
box_success, outcome, text = eval_output(
336+
box_success, outcome, text, admin_text = eval_output(
337337
file_cacher, job,
338338
TwoSteps.CHECKER_CODENAME
339339
if self._uses_checker() else None,
@@ -344,6 +344,7 @@ def evaluate(self, job, file_cacher):
344344
job.success = box_success
345345
job.outcome = str(outcome) if outcome is not None else None
346346
job.text = text
347+
job.admin_text = admin_text
347348
job.plus = stats
348349

349350
delete_sandbox(first_sandbox, job)

cms/grading/tasktypes/util.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def eval_output(
221221
user_output_digest: str | None = None,
222222
user_output_filename: str = "",
223223
extra_args: list[str] | None = None
224-
) -> tuple[bool, float | None, list[str] | None]:
224+
) -> tuple[bool, float | None, list[str] | None, str]:
225225
"""Evaluate ("check") a user output using a white diff or a checker.
226226
227227
file_cacher: file cacher to use to get files.
@@ -237,8 +237,8 @@ def eval_output(
237237
extra_args: additional arguments to pass to the checker
238238
239239
return: tuple of success (true if the checker was
240-
able to check the solution successfully), outcome and text (both None
241-
if success is False).
240+
able to check the solution successfully), outcome, text and admin_text
241+
(both None if success is False).
242242
243243
"""
244244
if (user_output_path is None) == (user_output_digest is None):
@@ -256,7 +256,7 @@ def eval_output(
256256

257257
if checker_codename is not None:
258258
if not check_manager_present(job, checker_codename):
259-
return False, None, None
259+
return False, None, None, None
260260

261261
# Create a brand-new sandbox just for checking.
262262
sandbox = create_sandbox(file_cacher, name="check")
@@ -275,12 +275,12 @@ def eval_output(
275275

276276
checker_digest = job.managers[checker_codename].digest \
277277
if checker_codename in job.managers else None
278-
success, outcome, text = checker_step(
278+
success, outcome, text, admin_text = checker_step(
279279
sandbox, checker_digest, job.input, job.output,
280280
EVAL_USER_OUTPUT_FILENAME, extra_args)
281281

282282
delete_sandbox(sandbox, job, success)
283-
return success, outcome, text
283+
return success, outcome, text, admin_text
284284

285285
else:
286286
if user_output_path is not None:
@@ -289,6 +289,6 @@ def eval_output(
289289
user_output_fobj = file_cacher.get_file(user_output_digest)
290290
with user_output_fobj:
291291
with file_cacher.get_file(job.output) as correct_output_fobj:
292-
outcome, text = white_diff_fobj_step(
292+
outcome, text, admin_text = white_diff_fobj_step(
293293
user_output_fobj, correct_output_fobj)
294-
return True, outcome, text
294+
return True, outcome, text, admin_text

0 commit comments

Comments
 (0)