diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index 207e80b815..a676c01bd1 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -54,14 +54,13 @@ import tempfile import time import zlib -from xml.etree import ElementTree from easybuild.base import fancylogger from easybuild.tools import run # import build_log must stay, to use of EasyBuildLog from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg, print_warning from easybuild.tools.config import DEFAULT_WAIT_ON_LOCK_INTERVAL, GENERIC_EASYBLOCK_PKG, build_option, install_path -from easybuild.tools.py2vs3 import std_urllib, string_type +from easybuild.tools.py2vs3 import HTMLParser, std_urllib, string_type from easybuild.tools.utilities import nub, remove_unwanted_chars try: @@ -519,15 +518,21 @@ def pypi_source_urls(pkg_name): else: urls_txt = read_file(urls_html) - # ignore yanked releases (see https://pypi.org/help/#yanked) - # see https://github.com/easybuilders/easybuild-framework/issues/3301 - urls_txt = re.sub(r'', '', urls_txt) + res = [] - parsed_html = ElementTree.ElementTree(ElementTree.fromstring(urls_txt)) - if hasattr(parsed_html, 'iter'): - res = [a.attrib['href'] for a in parsed_html.iter('a')] - else: - res = [a.attrib['href'] for a in parsed_html.getiterator('a')] + # note: don't use xml.etree.ElementTree to parse HTML page served by PyPI's simple API + # cfr. https://github.com/pypa/warehouse/issues/7886 + class HrefHTMLParser(HTMLParser): + """HTML parser to extract 'href' attribute values from anchor tags ().""" + + def handle_starttag(self, tag, attrs): + if tag == 'a': + attrs = dict(attrs) + if 'href' in attrs: + res.append(attrs['href']) + + parser = HrefHTMLParser() + parser.feed(urls_txt) # links are relative, transform them into full URLs; for example: # from: ../../packages////easybuild-.tar.gz#md5= diff --git a/easybuild/tools/py2vs3/py2.py b/easybuild/tools/py2vs3/py2.py index 7dbd9161dc..e67583fcae 100644 --- a/easybuild/tools/py2vs3/py2.py +++ b/easybuild/tools/py2vs3/py2.py @@ -34,6 +34,7 @@ import json import subprocess import urllib2 as std_urllib # noqa +from HTMLParser import HTMLParser # noqa from string import letters as ascii_letters # noqa from string import lowercase as ascii_lowercase # noqa from StringIO import StringIO # noqa diff --git a/easybuild/tools/py2vs3/py3.py b/easybuild/tools/py2vs3/py3.py index 3ff66d6802..55344dea12 100644 --- a/easybuild/tools/py2vs3/py3.py +++ b/easybuild/tools/py2vs3/py3.py @@ -38,6 +38,7 @@ from collections import OrderedDict # noqa from distutils.version import LooseVersion from functools import cmp_to_key +from html.parser import HTMLParser # noqa from itertools import zip_longest from io import StringIO # noqa from string import ascii_letters, ascii_lowercase # noqa