ytdl-org · sarnoud · Sep 19, 2021 · Sep 20, 2021 · Sep 20, 2021 · Sep 20, 2021
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
@@ -210,7 +210,6 @@
  - **CTV**
  - **CTVNews**
  - **cu.ntv.co.jp**: Nippon Television Network
- - **Culturebox**
  - **CultureUnplugged**
  - **curiositystream**
  - **curiositystream:collection**
@@ -307,13 +306,10 @@
  - **foxnews**: Fox News and Fox Business Video
  - **foxnews:article**
  - **FoxSports**
- - **france2.fr:generation-what**
  - **FranceCulture**
  - **FranceInter**
  - **FranceTV**
- - **FranceTVEmbed**
  - **francetvinfo.fr**
- - **FranceTVJeunesse**
  - **FranceTVSite**
  - **Freesound**
  - **freespeech.org**
@@ -472,8 +468,6 @@
  - **LinuxAcademy**
  - **LiTV**
  - **LiveJournal**
- - **LiveLeak**
- - **LiveLeakEmbed**
  - **livestream**
  - **livestream:original**
  - **LnkGo**
@@ -877,7 +871,6 @@
  - **SpankBangPlaylist**
  - **Spankwire**
  - **Spiegel**
- - **sport.francetvinfo.fr**
  - **Sport5**
  - **SportBox**
  - **SportDeutschland**

diff --git a/test/helper.py b/test/helper.py
@@ -190,7 +190,7 @@ def expect_info_dict(self, got_dict, expected_dict):
     expect_dict(self, got_dict, expected_dict)
     # Check for the presence of mandatory fields
     if got_dict.get('_type') not in ('playlist', 'multi_video'):
-        for key in ('id', 'url', 'title', 'ext'):
+        for key in ('id', 'webpage_url', 'title', 'ext'):
             self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
     # Check for mandatory fields that are automatically set by YoutubeDL
     for key in ['webpage_url', 'extractor', 'extractor_key']:

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
@@ -91,6 +91,7 @@
     write_string,
     YoutubeDLCookieJar,
     YoutubeDLCookieProcessor,
+    YoutubeDLError,
     YoutubeDLHandler,
     YoutubeDLRedirectHandler,
 )
@@ -1862,7 +1863,6 @@ def ensure_dir_exists(path):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['requested_subtitles']
-            ie = self.get_info_extractor(info_dict['extractor_key'])
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@@ -1880,12 +1880,15 @@ def ensure_dir_exists(path):
                             self.report_error('Cannot write subtitles file ' + sub_filename)
                             return
                     else:
+                        fd = get_suitable_downloader(sub_info, self.params)(self, self.params)
                         try:
-                            sub_data = ie._request_webpage(
-                                sub_info['url'], info_dict['id'], note=False).read()
-                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
-                                subfile.write(sub_data)
-                        except (ExtractorError, IOError, OSError, ValueError) as err:
+                            if self.params.get('verbose'):
+                                self.to_screen('[debug] Invoking subtitle downloader on %r' % sub_info.get('url'))
+                            # The FD is supposed to encodeFilename()
+                            if not fd.download(sub_filename, sub_info):
+                                # depending on the FD, it may catch errors and return False, or not
+                                raise YoutubeDLError('Subtitle download failed')
+                        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error, OSError, IOError, YoutubeDLError) as err:
                             self.report_warning('Unable to download subtitle for "%s": %s' %
                                                 (sub_lang, error_to_compat_str(err)))
                             continue

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -250,7 +250,10 @@ class InfoExtractor(object):
                     preference, each element is a dictionary with the "ext"
                     entry and one of:
                         * "data": The subtitles file contents
-                        * "url": A URL pointing to the subtitles file
+                        * "url": A URL pointing to the subtitles resource
+                    With "url", a "protocol" entry (as for "formats" above)
+                    may be provided to indicate how the URL should be
+                    processed; by default it is a file downloaded by HTTP(S)
                     "ext" will be calculated from URL if missing
     automatic_captions: Like 'subtitles', used by the YoutubeIE for
                     automatically generated captions
@@ -1635,7 +1638,7 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                               entry_protocol='m3u8', preference=None,
                               m3u8_id=None, note=None, errnote=None,
                               fatal=True, live=False, data=None, headers={},
-                              query={}):
+                              query={}, include_subtitles=False):
         res = self._download_webpage_handle(
             m3u8_url, video_id,
             note=note or 'Downloading m3u8 information',
@@ -1650,18 +1653,19 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
 
         return self._parse_m3u8_formats(
             m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
-            preference=preference, m3u8_id=m3u8_id, live=live)
+            preference=preference, m3u8_id=m3u8_id, live=live, include_subtitles=include_subtitles)
 
     def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
                             entry_protocol='m3u8', preference=None,
-                            m3u8_id=None, live=False):
+                            m3u8_id=None, live=False, include_subtitles=False):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
             return []
 
         if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc):  # Apple FairPlay
             return []
 
         formats = []
+        subtitles = {}
 
         format_url = lambda u: (
             u
@@ -1696,13 +1700,20 @@ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
         groups = {}
         last_stream_inf = {}
 
-        def extract_media(x_media_line):
+        def extract_media(x_media_line, include_subtitles=False):
             media = parse_m3u8_attributes(x_media_line)
             # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
             media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
             if not (media_type and group_id and name):
                 return
             groups.setdefault(group_id, []).append(media)
+            if include_subtitles and (media_type == 'SUBTITLES'):
+                subtitles[media['LANGUAGE']] = [{
+                    'url': format_url(media['URI']),
+                    'ext': media.get('SUBFORMAT', 'webtt'),
+                    'protocol': 'm3u8_native',
+                }]
+                return
             if media_type not in ('VIDEO', 'AUDIO'):
                 return
             media_url = media.get('URI')
@@ -1748,7 +1759,7 @@ def build_stream_name():
         # precede EXT-X-MEDIA tags in HLS manifest such as [3].
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-MEDIA:'):
-                extract_media(line)
+                extract_media(line, include_subtitles=include_subtitles)
 
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-STREAM-INF:'):
@@ -1828,6 +1839,8 @@ def build_stream_name():
                     formats.append(http_f)
 
                 last_stream_inf = {}
+        if include_subtitles:
+            return formats, subtitles
         return formats
 
     @staticmethod

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -401,12 +401,7 @@
 from .francetv import (
     FranceTVIE,
     FranceTVSiteIE,
-    FranceTVEmbedIE,
     FranceTVInfoIE,
-    FranceTVInfoSportIE,
-    FranceTVJeunesseIE,
-    GenerationWhatIE,
-    CultureboxIE,
 )
 from .freesound import FreesoundIE
 from .freespeech import FreespeechIE