Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion easybuild/tools/build_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def init_logging(logfile, logtostdout=False, silent=False, colorize=fancylogger.
os.close(fd)

fancylogger.logToFile(logfile, max_bytes=0)
print_msg('temporary log file in case of crash %s' % (logfile), log=None, silent=silent)
print_msg('Temporary log file in case of crash %s' % (logfile), log=None, silent=silent)

log = fancylogger.getLogger(fname=False)

Expand Down
1 change: 1 addition & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
'filter_env_vars',
'hide_deps',
'hide_toolchains',
'http_header_fields_urlpat',
'force_download',
'from_pr',
'git_working_dirs_path',
Expand Down
83 changes: 83 additions & 0 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,70 @@ def derive_alt_pypi_url(url):
return alt_pypi_url


def parse_http_header_fields_urlpat(arg, urlpat=None, header=None, urlpat_headers={}, maxdepth=3):
"""
Recurse into [URLPAT::][HEADER: ]FILE|FIELD where FILE may be another such string or
file containing lines matching the same format, and flatten the result as a dict
e.g. {'^example.com': ['Authorization: Basic token', 'User-Agent: Special Agent']}
"""
# stop infinite recursion that might happen if a file.txt refers to itself
if maxdepth < 0:
_log.debug("Failed to parse_http_header_fields_urlpat (recursion limit)")
return urlpat_headers

if not isinstance(arg, str):
_log.debug("Failed to parse_http_header_fields_urlpat (argument not a string)")
return urlpat_headers

# HTTP header fields are separated by CRLF but splitting on LF is more convenient
for argline in arg.split('\n'):
argline = argline.strip() # remove optional whitespace (e.g. remaining CR)
if argline == '' or '#' in argline[0]:
continue # permit comment lines: ignore them

if os.path.isfile(os.path.join(os.getcwd(), argline)):
# expand existing relative path to absolute
argline = os.path.join(os.path.join(os.getcwd(), argline))
if os.path.isfile(argline):
# argline is a file path, so read that instead
_log.debug('File included in parse_http_header_fields_urlpat: %s' % argline)
argline = read_file(argline)
urlpat_headers = parse_http_header_fields_urlpat(argline, urlpat, header, urlpat_headers, maxdepth - 1)
continue

# URL pattern is separated by '::' from a HTTP header field
if '::' in argline:
[urlpat, argline] = argline.split('::', 1) # get the urlpat
# the remainder may be another parseable argument, recurse with same depth
urlpat_headers = parse_http_header_fields_urlpat(argline, urlpat, header, urlpat_headers, maxdepth)
continue

# Header field has format HEADER: FIELD, and FIELD may be another parseable argument
# except if FIELD contains colons, then argline is the final HEADER: FIELD to be returned
if ':' in argline and argline.count(':') == 1:
[argheader, argline] = argline.split(':', 1) # get the header and the remainder
# the remainder may be another parseable argument, recurse with same depth
# note that argheader would be forgotten in favor of the urlpat_headers returned by recursion,
# so pass on the header for reconstruction just in case there was nothing to recurse in
urlpat_headers = parse_http_header_fields_urlpat(argline, urlpat, argheader, urlpat_headers, maxdepth)
continue

if header is not None:
# parent caller didn't want to forget about the header, reconstruct as recursion stops here.
argline = header.strip() + ': ' + argline

if urlpat is not None:
if urlpat in urlpat_headers.keys():
urlpat_headers[urlpat].append(argline) # add headers to the list
else:
urlpat_headers[urlpat] = list([argline]) # new list headers for this urlpat
else:
_log.warning("Non-empty argument to http-header-fields-urlpat ignored (missing URL pattern)")

# return a dict full of {urlpat: [list, of, headers]}
return urlpat_headers


def download_file(filename, url, path, forced=False):
"""Download a file from the given URL, to the specified path."""

Expand All @@ -581,6 +645,15 @@ def download_file(filename, url, path, forced=False):
timeout = 10
_log.debug("Using timeout of %s seconds for initiating download" % timeout)

# parse option HTTP header fields for URLs containing a pattern
http_header_fields_urlpat = build_option('http_header_fields_urlpat')
# compile a dict full of {urlpat: [header, list]}
urlpat_headers = dict()
if http_header_fields_urlpat is not None:
# there may be multiple options given, parse them all, while updating urlpat_headers
for arg in http_header_fields_urlpat:
urlpat_headers = parse_http_header_fields_urlpat(arg, urlpat_headers)

# make sure directory exists
basedir = os.path.dirname(path)
mkdir(basedir, parents=True)
Expand All @@ -592,6 +665,16 @@ def download_file(filename, url, path, forced=False):

# use custom HTTP header
headers = {'User-Agent': 'EasyBuild', 'Accept': '*/*'}

# permit additional or override headers via http_headers_fields_urlpat option
# only append/override HTTP header fields that match current url
for urlpatkey, http_header_fields in urlpat_headers.items():
if re.search(urlpatkey, url):
extraheaders = dict(hf.split(':', 1) for hf in http_header_fields)
for key, val in extraheaders.items():
headers[key] = val
_log.debug('Custom HTTP header field set: %s (value omitted from log)' % (key))

# for backward compatibility, and to avoid relying on 3rd party Python library 'requests'
url_req = std_urllib.Request(url, headers=headers)
used_urllib = std_urllib
Expand Down
3 changes: 3 additions & 0 deletions easybuild/tools/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ def override_options(self):
"(e.g. --hide-deps=zlib,ncurses)", 'strlist', 'extend', None),
'hide-toolchains': ("Comma separated list of toolchains that you want automatically hidden, "
"(e.g. --hide-toolchains=GCCcore)", 'strlist', 'extend', None),
'http-header-fields-urlpat': (("Set extra HTTP header fields (or file) for URL patterns;"
"(e.g. ^https://www.example.com::/path/to/headers.txt)"),
None, 'append', None, {'metavar': 'PAT::FIELD[,[PAT::]FIELDS..]'}),
'ignore-checksums': ("Ignore failing checksum verification", None, 'store_true', False),
'ignore-osdeps': ("Ignore any listed OS dependencies", None, 'store_true', False),
'install-latest-eb-release': ("Install latest known version of easybuild", None, 'store_true', False),
Expand Down
2 changes: 1 addition & 1 deletion test/framework/build_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def test_init_logging(self):
self.assertTrue(os.path.exists(logfile))
self.assertEqual(os.path.dirname(logfile), tmpdir)
self.assertTrue(isinstance(log, EasyBuildLog))
self.assertTrue(stdout.startswith("== temporary log file in case of crash"))
self.assertTrue(stdout.startswith("== Temporary log file in case of crash"))

stop_logging(logfile)

Expand Down
142 changes: 142 additions & 0 deletions test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -2563,6 +2563,148 @@ def test_hide_toolchains(self):
self.assertTrue(re.search(r'module: GCC/\.4\.9\.2', outtxt))
self.assertTrue(re.search(r'module: gzip/1\.6-GCC-4\.9\.2', outtxt))

def test_http_header_fields_urlpat(self):
"""Test use of --http-header-fields-urlpat."""
test_ecs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'easyconfigs', 'test_ecs')
ec_file = os.path.join(test_ecs_dir, 'g', 'gzip', 'gzip-1.6-GCC-4.9.2.eb')
common_args = [
ec_file,
'--stop=fetch',
'--debug',
'--force',
'--force-download',
'--logtostdout',
]

# define header fields:values that should (not) show up in the logs, either
# because they are secret or because they are not matched for the url
testdohdr = 'HeaderAPPLIED'
testdoval = 'SECRETvalue'
testdonthdr = 'HeaderIGNORED'
testdontval = 'BOGUSvalue'

# header fields (or its values) could be files to be read instead of literals
testcmdfile = os.path.join(self.test_prefix, 'testhttpheaderscmdline.txt')
testincfile = os.path.join(self.test_prefix, 'testhttpheadersvalinc.txt')
testexcfile = os.path.join(self.test_prefix, 'testhttpheadersvalexc.txt')
testinchdrfile = os.path.join(self.test_prefix, 'testhttpheadershdrinc.txt')
testexchdrfile = os.path.join(self.test_prefix, 'testhttpheadershdrexc.txt')
testurlpatfile = os.path.join(self.test_prefix, 'testhttpheadersurlpat.txt')

# log mention format upon header or file inclusion
mentionhdr = 'Custom HTTP header field set: %s'
mentionfile = 'File included in parse_http_header_fields_urlpat: %s'

def run_and_assert(args, msg, words_expected=None, words_unexpected=None):
stdout, stderr = self._run_mock_eb(args, do_build=True, raise_error=True, testing=False)
if words_expected is not None:
for thestring in words_expected:
self.assertTrue(re.compile(thestring).search(stdout), "Pattern '%s' missing from log (%s)" %
(thestring, msg))
if words_unexpected is not None:
for thestring in words_unexpected:
self.assertFalse(re.compile(thestring).search(stdout), "Pattern '%s' leaked into log (%s)" %
(thestring, msg))

# A: simple direct case (all is logged)
args = list(common_args)
args.extend([
'--http-header-fields-urlpat=gnu.org::%s:%s' % (testdohdr, testdoval),
'--http-header-fields-urlpat=nomatch.com::%s:%s' % (testdonthdr, testdontval),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also check with single --http-header-fields-urlpat that has both entries, comma-separated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that, but decided not to use the strlist style option, because the comma could be used as a field value. The separation character is \n, but that is awkward to use on the command line. The user can still use the option twice or more times if needed, or just use a file to specify an array of fields... So there is no support for --http-header-fields-urlpat=a,b,c at the moment. Is it ok?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a good technical reason not to support it, so fine by me :)

])
# expect to find everything passed on cmdline
run_and_assert(
args,
'case A',
[mentionhdr % (testdohdr), testdoval, testdonthdr, testdontval]
)

# all subsequent tests share this argument list
args = common_args
args.append('--http-header-fields-urlpat=%s' % (testcmdfile))

# B: simple file case (secrets in file are not logged)
write_file(
testcmdfile,
'\n'.join(
[
'gnu.org::%s: %s' % (testdohdr, testdoval),
'nomatch.com::%s: %s' % (testdonthdr, testdontval),
'',
]
),
)
# expect to find only the header key (not its value) and only for the appropriate url
run_and_assert(
args,
'case B',
[mentionhdr % (testdohdr), mentionfile % (testcmdfile)],
[testdoval, testdonthdr, testdontval],
)

# C: recursion one: header value is another file
write_file(
testcmdfile,
'\n'.join(
[
'gnu.org::%s: %s' % (testdohdr, testincfile),
'nomatch.com::%s: %s' % (testdonthdr, testexcfile),
'',
]
),
)
write_file(testincfile, '%s\n' % (testdoval))
write_file(testexcfile, '%s\n' % (testdontval))
# expect to find only the header key (not its value and not the filename) and only for the appropriate url
run_and_assert(
args,
'case C',
[
mentionhdr % (testdohdr),
mentionfile % (testcmdfile),
mentionfile % (testincfile),
mentionfile % (testexcfile),
],
[testdoval, testdonthdr, testdontval],
)

# D: recursion two: header field+value is another file,
write_file(testcmdfile, '\n'.join(['gnu.org::%s' % (testinchdrfile), 'nomatch.com::%s' % (testexchdrfile), '']))
write_file(testinchdrfile, '%s: %s\n' % (testdohdr, testdoval))
write_file(testexchdrfile, '%s: %s\n' % (testdonthdr, testdontval))
# expect to find only the header key (and the literal filename) and only for the appropriate url
run_and_assert(
args,
'case D',
[
mentionhdr % (testdohdr),
mentionfile % (testcmdfile),
mentionfile % (testinchdrfile),
mentionfile % (testexchdrfile),
],
[testdoval, testdonthdr, testdontval],
)

# E: recursion three: url pattern + header field + value in another file
write_file(testcmdfile, '%s\n' % (testurlpatfile))
write_file(
testurlpatfile,
'\n'.join(
[
'gnu.org::%s: %s' % (testdohdr, testdoval),
'nomatch.com::%s: %s' % (testdonthdr, testdontval),
'',
]
),
)
# expect to find only the header key (but not the literal filename) and only for the appropriate url
run_and_assert(
args,
'case E',
[mentionhdr % (testdohdr), mentionfile % (testcmdfile), mentionfile % (testurlpatfile)],
[testdoval, testdonthdr, testdontval],
)

def test_test_report_env_filter(self):
"""Test use of --test-report-env-filter."""

Expand Down