-
Notifications
You must be signed in to change notification settings - Fork 10.5k
[FranceTV] Back-port and update extractors #33131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
* FranceTVIE: updated * FranceTVSiteIE: updated * FranceTVInfoIE: updated, add la1ere, use franceinfo.fr * FranceTVInfoSportIE: legacy redirect extractor * FranceTVJeunesseIE: obsolete, not `_WORKING` * GenerationWhatIE: obsolete, not `_WORKING` * CultureboxIE: legacy redirect extractor
|
For posterity and searchability ... In some early Pythons, the regular expression compiler might raise In _VALID_URL = r'''(?x)
https?://embed\.francetv\.fr(?:/?\?(?:.*&)?(?P<ue>ue)=|/)
- (?P<id>[\da-f]{32})(?:(?(ue)&|/?[?#]).*)?$
+ (?P<id>[\da-f]{32})(?:|(?(ue)&|/?[?#]).*)$
''' |
| if identity and isinstance(identity, str): | ||
| entries.append(self._make_url_result(identity)) | ||
|
|
||
| return self.playlist_result(entries, playlist_id) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Roll in Lumni.
| return self.playlist_result(entries, playlist_id) | |
| return self.playlist_result(entries, playlist_id) | |
| class LumniBaseIE(FranceTVBaseIE): | |
| _VIDEO_DIV_RE = r'(<div\s[^>]*(?<!-)\bdata-factoryid\s*=\s*(?!""|\'\')\S[^>]*>)' | |
| class LumniIE(LumniBaseIE): | |
| _VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)' | |
| _TESTS = [{ | |
| 'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle', | |
| 'add_ie': [FranceTVIE.ie_key()], | |
| 'md5': '84ddbef8fede632091f201eda38192d9', | |
| 'info_dict': { | |
| 'id': 'd2b9a4e5-a526-495b-866c-ab72737e3645', | |
| 'ext': 'mp4', | |
| 'title': "L'homme et son environnement dans la révolution industrielle - L'ère de l'homme", | |
| 'timestamp': 1725372585, | |
| 'upload_date': '20240903', | |
| 'thumbnail': r're:https?:/(?:/[\w.-]+)+\.jpg$', | |
| 'duration': 230, | |
| }, | |
| 'params': { | |
| 'format': 'best/bestvideo', | |
| 'skip_download': 'm3u8', | |
| }, | |
| }] | |
| def _real_extract(self, url): | |
| display_id = self._match_id(url) | |
| webpage = self._download_webpage(url, display_id) | |
| video_id = extract_attributes(self._search_regex( | |
| self._VIDEO_DIV_RE, webpage, 'video id'))['data-factoryid'] | |
| return self._make_url_result(video_id, url=url) | |
| class LumniPlaylistIE(LumniBaseIE): | |
| _VALID_URL = r'''(?x) | |
| https?:// | |
| (?:www\.)?lumni\.fr/ | |
| (?:dossier|programme|serie)/ | |
| (?P<id>[0-9a-z-]+) | |
| ''' | |
| _TESTS = [{ | |
| 'url': 'https://www.lumni.fr/dossier/les-fondamentaux-vocabulaire', | |
| 'add_ie': [FranceTVIE.ie_key()], | |
| 'info_dict': { | |
| 'id': 'les-fondamentaux-vocabulaire', | |
| 'title': 're:(?i)^Les Fondamentaux : Vocabulaire$', | |
| }, | |
| 'playlist_mincount': 9 | |
| }, { | |
| 'url': 'https://www.lumni.fr/programme/the-rich-morning-show', | |
| 'only_matching': True | |
| }, { | |
| 'url': 'https://www.lumni.fr/serie/la-maison-lumni-college', | |
| 'only_matching': True | |
| }] | |
| def _real_extract(self, url): | |
| playlist_id = self._match_id(url) | |
| webpage = self._download_webpage(url, playlist_id) | |
| playlist_title = self._og_search_title(webpage) | |
| def entries(): | |
| for m in re.finditer(self._VIDEO_DIV_RE, webpage): | |
| video_id = extract_attributes(m.group(1)).get('data-factoryid') | |
| if video_id: | |
| yield self._make_url_result(video_id, url=url) | |
| return self.playlist_result( | |
| entries(), playlist_id, playlist_title) |
No.
The suggestions cover known best practices and/or optimisations that are or would be implemented if appropriate and worthwhile. Item 4 is just wrong to identify |
| # format_field, | ||
| HEADRequest, | ||
| int_or_none, | ||
| join_nonempty, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Support video ID in SvelteKit JS - pt 2:
| join_nonempty, | |
| join_nonempty, | |
| js_to_json, |
| player_button = extract_attributes(self._search_regex( | ||
| r'(<button\s[^>]*(?<!-)\bdata-url\s*=\s*["\'][^>]{2,}>)', | ||
| webpage, 'fi player button', default='')) | ||
| if player_button.get('data-url'): | ||
| result = merge_dicts(traverse_obj(player_button, { | ||
| 'id': 'data-expression-uuid', | ||
| 'timestamp': ('data-start-time', T(int_or_none)), | ||
| 'duration': T(lambda x: int(x['data-end-time']) - int(x['data-start-time'])), | ||
| 'title': 'data-extract-title', | ||
| 'description': 'data-diffusion-title', | ||
| 'series': 'data-emission-title', | ||
| 'url': ('data-url', T(url_or_none)), | ||
| }), { | ||
| 'thumbnail': self._og_search_thumbnail(webpage, default=None), | ||
| }) | ||
| if result.get('url'): | ||
| if not result.get('title'): | ||
| result['title'] = self._html_search_regex( | ||
| r'''Retrouvez l'intégralité du\s+(.+)\s*:''', | ||
| webpage, 'Alt title').replace('"', '') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Support video ID in SvelteKit JS - pt 1:
| player_button = extract_attributes(self._search_regex( | |
| r'(<button\s[^>]*(?<!-)\bdata-url\s*=\s*["\'][^>]{2,}>)', | |
| webpage, 'fi player button', default='')) | |
| if player_button.get('data-url'): | |
| result = merge_dicts(traverse_obj(player_button, { | |
| 'id': 'data-expression-uuid', | |
| 'timestamp': ('data-start-time', T(int_or_none)), | |
| 'duration': T(lambda x: int(x['data-end-time']) - int(x['data-start-time'])), | |
| 'title': 'data-extract-title', | |
| 'description': 'data-diffusion-title', | |
| 'series': 'data-emission-title', | |
| 'url': ('data-url', T(url_or_none)), | |
| }), { | |
| 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
| }) | |
| if result.get('url'): | |
| if not result.get('title'): | |
| result['title'] = self._html_search_regex( | |
| r'''Retrouvez l'intégralité du\s+(.+)\s*:''', | |
| webpage, 'Alt title').replace('"', '') | |
| svelte_js_content = self._search_json( | |
| r'\bcontent\s*:', webpage, 'svelte content', display_id, | |
| transform_source=js_to_json, default={}) | |
| result['id'] = traverse_obj(svelte_js_content, ('diffusion', 'videoId', T(str))) | |
| if not result.get('id'): | |
| player_button = extract_attributes(self._search_regex( | |
| r'(<button\s[^>]*(?<!-)\bdata-url\s*=\s*["\'][^>]{2,}>)', | |
| webpage, 'fi player button', default='')) | |
| if player_button.get('data-url'): | |
| result = merge_dicts(traverse_obj(player_button, { | |
| 'id': 'data-expression-uuid', | |
| 'timestamp': ('data-start-time', T(int_or_none)), | |
| 'duration': T(lambda x: int(x['data-end-time']) - int(x['data-start-time'])), | |
| 'title': 'data-extract-title', | |
| 'description': 'data-diffusion-title', | |
| 'series': 'data-emission-title', | |
| 'url': ('data-url', T(url_or_none)), | |
| }), { | |
| 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
| }) | |
| if result.get('url'): | |
| if not result.get('title'): | |
| result['title'] = self._html_search_regex( | |
| r'''Retrouvez l'intégralité du\s+(.+)\s*:''', | |
| webpage, 'Alt title').replace('"', '') |
Boilerplate: yt-dlp+own code, bug-fix+improvement
Please follow the guide below
xinto all the boxes [ ] relevant to your pull request (like that [x])Before submitting a pull request make sure you have:
In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under Unlicense. Check one of the following options:
What is the purpose of your pull request?
Description of your pull request and other information
_WORKING_WORKINGSupersedes, closes #29996
Supersedes, closes #32884
Resolves #25996
Resolves #29956
Resolves #32713