Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@
- **CTV**
- **CTVNews**
- **cu.ntv.co.jp**: Nippon Television Network
- **Culturebox**
- **CultureUnplugged**
- **curiositystream**
- **curiositystream:collection**
Expand Down Expand Up @@ -307,13 +306,10 @@
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
- **FoxSports**
- **france2.fr:generation-what**
- **FranceCulture**
- **FranceInter**
- **FranceTV**
- **FranceTVEmbed**
- **francetvinfo.fr**
- **FranceTVJeunesse**
- **FranceTVSite**
- **Freesound**
- **freespeech.org**
Expand Down Expand Up @@ -472,8 +468,6 @@
- **LinuxAcademy**
- **LiTV**
- **LiveJournal**
- **LiveLeak**
- **LiveLeakEmbed**
- **livestream**
- **livestream:original**
- **LnkGo**
Expand Down Expand Up @@ -877,7 +871,6 @@
- **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **sport.francetvinfo.fr**
- **Sport5**
- **SportBox**
- **SportDeutschland**
Expand Down
2 changes: 1 addition & 1 deletion test/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def expect_info_dict(self, got_dict, expected_dict):
expect_dict(self, got_dict, expected_dict)
# Check for the presence of mandatory fields
if got_dict.get('_type') not in ('playlist', 'multi_video'):
for key in ('id', 'url', 'title', 'ext'):
for key in ('id', 'webpage_url', 'title', 'ext'):
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
Expand Down
15 changes: 9 additions & 6 deletions youtube_dl/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
write_string,
YoutubeDLCookieJar,
YoutubeDLCookieProcessor,
YoutubeDLError,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
)
Expand Down Expand Up @@ -1862,7 +1863,6 @@ def ensure_dir_exists(path):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
Expand All @@ -1880,12 +1880,15 @@ def ensure_dir_exists(path):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
else:
fd = get_suitable_downloader(sub_info, self.params)(self, self.params)
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
if self.params.get('verbose'):
self.to_screen('[debug] Invoking subtitle downloader on %r' % sub_info.get('url'))
# The FD is supposed to encodeFilename()
if not fd.download(sub_filename, sub_info):
# depending on the FD, it may catch errors and return False, or not
raise YoutubeDLError('Subtitle download failed')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error, OSError, IOError, YoutubeDLError) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
Expand Down
25 changes: 19 additions & 6 deletions youtube_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,10 @@ class InfoExtractor(object):
preference, each element is a dictionary with the "ext"
entry and one of:
* "data": The subtitles file contents
* "url": A URL pointing to the subtitles file
* "url": A URL pointing to the subtitles resource
With "url", a "protocol" entry (as for "formats" above)
may be provided to indicate how the URL should be
processed; by default it is a file downloaded by HTTP(S)
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions
Expand Down Expand Up @@ -1635,7 +1638,7 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None,
fatal=True, live=False, data=None, headers={},
query={}):
query={}, include_subtitles=False):
res = self._download_webpage_handle(
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
Expand All @@ -1650,18 +1653,19 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,

return self._parse_m3u8_formats(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
preference=preference, m3u8_id=m3u8_id, live=live)
preference=preference, m3u8_id=m3u8_id, live=live, include_subtitles=include_subtitles)

def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
entry_protocol='m3u8', preference=None,
m3u8_id=None, live=False):
m3u8_id=None, live=False, include_subtitles=False):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return []

if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
return []

formats = []
subtitles = {}

format_url = lambda u: (
u
Expand Down Expand Up @@ -1696,13 +1700,20 @@ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
groups = {}
last_stream_inf = {}

def extract_media(x_media_line):
def extract_media(x_media_line, include_subtitles=False):
media = parse_m3u8_attributes(x_media_line)
# As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
if not (media_type and group_id and name):
return
groups.setdefault(group_id, []).append(media)
if include_subtitles and (media_type == 'SUBTITLES'):
subtitles[media['LANGUAGE']] = [{
'url': format_url(media['URI']),
'ext': media.get('SUBFORMAT', 'webtt'),
'protocol': 'm3u8_native',
}]
return
if media_type not in ('VIDEO', 'AUDIO'):
return
media_url = media.get('URI')
Expand Down Expand Up @@ -1748,7 +1759,7 @@ def build_stream_name():
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-MEDIA:'):
extract_media(line)
extract_media(line, include_subtitles=include_subtitles)

for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-STREAM-INF:'):
Expand Down Expand Up @@ -1828,6 +1839,8 @@ def build_stream_name():
formats.append(http_f)

last_stream_inf = {}
if include_subtitles:
return formats, subtitles
return formats

@staticmethod
Expand Down
5 changes: 0 additions & 5 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,12 +401,7 @@
from .francetv import (
FranceTVIE,
FranceTVSiteIE,
FranceTVEmbedIE,
FranceTVInfoIE,
FranceTVInfoSportIE,
FranceTVJeunesseIE,
GenerationWhatIE,
CultureboxIE,
)
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
Expand Down
Loading