Skip to content

Commit d74419f

Browse files
committed
Fix to_checksums with None values in dicts and recursion
Having a `'src': None` entry in a dict for checksums is as valid as having a `None` entry directly in the list. However the current function didn't handle it and crashed. Fix that as well as a few corner cases especially in the recursive case by introducing a new function for handling checksum entries in the checksum list and limiting the recursiveness. Fixes #4142
1 parent 2281945 commit d74419f

2 files changed

Lines changed: 135 additions & 38 deletions

File tree

easybuild/framework/easyconfig/types.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -510,33 +510,51 @@ def to_dependencies(dep_list):
510510
return [to_dependency(dep) for dep in dep_list]
511511

512512

513-
def to_checksums(checksums):
514-
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
515-
res = []
516-
for checksum in checksums:
517-
# each list entry can be:
518-
# * None (indicates no checksum)
519-
# * a string (SHA256 checksum)
520-
# * a tuple with 2 elements: checksum type + checksum value
521-
# * a list of checksums (i.e. multiple checksums for a single file)
522-
# * a dict (filename to checksum mapping)
523-
if isinstance(checksum, str):
524-
res.append(checksum)
525-
elif isinstance(checksum, (list, tuple)):
526-
# 2 elements + only string/int values => a checksum tuple
527-
if len(checksum) == 2 and all(isinstance(x, (str, int)) for x in checksum):
528-
res.append(tuple(checksum))
513+
def _to_checksum(checksum, list_level=0, allow_dict=True):
514+
"""Ensure the correct element type for each checksum in the checksum list"""
515+
# each entry can be:
516+
# * None (indicates no checksum)
517+
# * a string (SHA256 checksum)
518+
# * a list or tuple with 2 elements: checksum type + checksum value
519+
# * a list or tuple of checksums (i.e. multiple checksums for a single file)
520+
# * a dict (filename to checksum mapping)
521+
if checksum is None or isinstance(checksum, str):
522+
return checksum
523+
elif isinstance(checksum, (list, tuple)):
524+
if len(checksum) == 2 and isinstance(checksum[0], str) and isinstance(checksum[1], (str, int)):
525+
# 2 elements so either:
526+
# - a checksum tuple (2nd element string or int)
527+
# - 2 alternative checksums (tuple)
528+
# - 2 checksums that must each match (list)
529+
# --> Convert to tuple only if we can exclude the 3rd case
530+
if not isinstance(checksum[1], str) or list_level > 0:
531+
return tuple(checksum)
529532
else:
530-
res.append(to_checksums(checksum))
531-
elif isinstance(checksum, dict):
532-
validated_dict = {}
533-
for key, value in checksum.items():
534-
validated_dict[key] = to_checksums(value)
535-
res.append(validated_dict)
536-
else:
537-
res.append(checksum)
533+
return checksum
534+
elif list_level < 2:
535+
# Alternative checksums or multiple checksums for a single file
536+
# Allowed to nest (at most) 2 times, e.g. [[[type, value]]] == [[(type, value)]]
537+
# None is not allowed here
538+
if any(x is None for x in checksum):
539+
raise ValueError('Unexpected None in ' + str(checksum))
540+
if isinstance(checksum, tuple) or list_level > 0:
541+
# When we already are in a tuple no further recursion is allowed -> set list_level very high
542+
return tuple(_to_checksum(x, list_level=99, allow_dict=allow_dict) for x in checksum)
543+
else:
544+
return list(_to_checksum(x, list_level=list_level+1, allow_dict=allow_dict) for x in checksum)
545+
elif isinstance(checksum, dict) and allow_dict:
546+
return {key: _to_checksum(value, allow_dict=False) for key, value in checksum.items()}
538547

539-
return res
548+
# Not returned -> Wrong type/format
549+
raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum)))
550+
551+
552+
def to_checksums(checksums):
553+
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
554+
try:
555+
return [_to_checksum(checksum) for checksum in checksums]
556+
except ValueError as e:
557+
raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e)
540558

541559

542560
def ensure_iterable_license_specs(specs):

test/framework/type_checking.py

Lines changed: 92 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -171,18 +171,18 @@ def test_check_type_of_param_value_sanity_check_paths(self):
171171
out = {'files': ['bin/foo', ('bin/bar', 'bin/baz')], 'dirs': [('lib', 'lib64', 'lib32')]}
172172
self.assertEqual(check_type_of_param_value('sanity_check_paths', inp, auto_convert=True), (True, out))
173173

174-
def test_check_type_of_param_value_checksums(self):
175-
"""Test check_type_of_param_value function for checksums."""
174+
@staticmethod
175+
def get_valid_checksums_values():
176+
"""Return list of values valid for the 'checksums' EC parameter"""
176177

177178
# Using (actually invalid) prefix to better detect those in case of errors
178179
md5_checksum = 'md518be8435447a017fd1bf2c7ae9224'
179180
sha256_checksum1 = 'sha18be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265'
180181
sha256_checksum2 = 'sha2cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551'
181182
sha256_checksum3 = 'sha3e54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde'
182183
filesize = 45617379
183-
184184
# valid values for 'checksums' easyconfig parameters
185-
inputs = [
185+
return [
186186
[],
187187
# single checksum (one file)
188188
[md5_checksum],
@@ -239,7 +239,11 @@ def test_check_type_of_param_value_checksums(self):
239239
{'foo.txt': sha256_checksum1, 'bar.txt': None},
240240
],
241241
]
242-
for inp in inputs:
242+
243+
def test_check_type_of_param_value_checksums(self):
244+
"""Test check_type_of_param_value function for checksums."""
245+
246+
for inp in TypeCheckingTest.get_valid_checksums_values():
243247
type_ok, newval = check_type_of_param_value('checksums', inp)
244248
self.assertIs(type_ok, True, 'Failed for ' + str(inp))
245249
self.assertEqual(newval, inp)
@@ -724,19 +728,94 @@ def test_to_sanity_check_paths_dict(self):
724728

725729
def test_to_checksums(self):
726730
"""Test to_checksums function."""
731+
# Some hand-crafted examples. Only the types are important, values are for easier verification
727732
test_inputs = [
728-
['be662daa971a640e40be5c804d9d7d10'],
729-
['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')],
730-
[['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]],
731-
[('md5', 'be662daa971a640e40be5c804d9d7d10')],
732-
['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'),
733-
('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
734-
('size', 273)],
733+
['checksumvalue'],
734+
[('md5', 'md5checksumvalue')],
735+
['file_1_checksum', ('md5', 'file_2_md5_checksum')],
736+
# One checksum per file, some with checksum type
737+
[
738+
'be662daa971a640e40be5c804d9d7d10',
739+
('adler32', '0x998410035'),
740+
('crc32', '0x1553842328'),
741+
('md5', 'be662daa971a640e40be5c804d9d7d10'),
742+
('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
743+
# int type as the 2nd value
744+
('size', 273),
745+
],
735746
# None values should not be filtered out, but left in place
736-
[None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None],
747+
[None, 'checksum', None],
748+
# Alternative checksums, not to be confused with multiple checksums for a file
749+
[('main_checksum', 'alternative_checksum')],
750+
[('1st_of_3', '2nd_of_3', '3rd_of_3')],
751+
# Lists must be kept: This means all must match
752+
[['checksum_1_in_list']],
753+
[['checksum_must_match', 'this_must_also_match']],
754+
[['1st_of_3_list', '2nd_of_3_list', '3rd_of_3_list']],
755+
# Alternative checksums with types
756+
[
757+
(('adler32', '1st_adler'), ('crc32', '1st_crc')),
758+
(('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')),
759+
],
760+
# Entries can be dicts even containing `None`
761+
[
762+
{
763+
'src-arm.tgz': 'arm_checksum',
764+
'src-x86.tgz': ('mainchecksum', 'altchecksum'),
765+
'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')),
766+
'git-clone.tgz': None,
767+
},
768+
{
769+
'src': ['checksum_must_match', 'this_must_also_match']
770+
},
771+
# 2nd required checksum a dict
772+
['first_checksum', {'src-arm': 'arm_checksum'}]
773+
],
737774
]
738775
for checksums in test_inputs:
739776
self.assertEqual(to_checksums(checksums), checksums)
777+
# Also reuse the checksums we use in test_check_type_of_param_value_checksums
778+
# When a checksum is valid it must not be modified
779+
for checksums in TypeCheckingTest.get_valid_checksums_values():
780+
self.assertEqual(to_checksums(checksums), checksums)
781+
782+
# List in list converted to tuple -> alternatives or checksum with type
783+
checksums = [['1stchecksum', ['md5', 'md5sum']]]
784+
checksums_expected = [['1stchecksum', ('md5', 'md5sum')]]
785+
self.assertEqual(to_checksums(checksums), checksums_expected)
786+
787+
# Error detection
788+
wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))]
789+
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting)
790+
correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]
791+
self.assertEqual(to_checksums(correct_nesting), correct_nesting)
792+
# YEB (YAML EC) doesn't has tuples so it uses lists instead which need to get converted
793+
correct_nesting_yeb = [[['1stchecksum', ['md5', 'md5sum'], ['md5', 'altmd5sum']]]]
794+
correct_nesting_yeb_conv = [[('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]]
795+
self.assertEqual(to_checksums(correct_nesting_yeb), correct_nesting_yeb_conv)
796+
self.assertEqual(to_checksums(correct_nesting_yeb_conv), correct_nesting_yeb_conv)
797+
798+
unexpected_set = [('1stchecksum', {'md5', 'md5sum'})]
799+
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set)
800+
unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}]
801+
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict)
802+
correct_dict = [{'src': ('md5sum', 'shasum')}]
803+
self.assertEqual(to_checksums(correct_dict), correct_dict)
804+
correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}]
805+
correct_dict_2 = [{'src': [('md5', 'md5sum'), ('sha', 'shasum')]}]
806+
self.assertEqual(to_checksums(correct_dict_2), correct_dict_2)
807+
self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # inner lists to tuples
808+
809+
unexpected_Nones = [
810+
[('1stchecksum', None)],
811+
[['1stchecksum', None]],
812+
[{'src': ('md5sum', None)}],
813+
[{'src': ['md5sum', None]}],
814+
]
815+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[0])
816+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[1])
817+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[2])
818+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[3])
740819

741820
def test_ensure_iterable_license_specs(self):
742821
"""Test ensure_iterable_license_specs function."""

0 commit comments

Comments
 (0)