mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 03:32:05 +00:00
[youtube:tab] Improve grid extraction (closes #28725)
This commit is contained in:
parent
ea87ed8394
commit
7c52395479
1 changed files with 21 additions and 17 deletions
|
@ -2320,10 +2320,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_grid_item_renderer(item):
|
def _extract_grid_item_renderer(item):
|
||||||
for item_kind in ('Playlist', 'Video', 'Channel', 'Show'):
|
assert isinstance(item, dict)
|
||||||
renderer = item.get('grid%sRenderer' % item_kind)
|
for key, renderer in item.items():
|
||||||
if renderer:
|
if not key.startswith('grid') or not key.endswith('Renderer'):
|
||||||
return renderer
|
continue
|
||||||
|
if not isinstance(renderer, dict):
|
||||||
|
continue
|
||||||
|
return renderer
|
||||||
|
|
||||||
def _grid_entries(self, grid_renderer):
|
def _grid_entries(self, grid_renderer):
|
||||||
for item in grid_renderer['items']:
|
for item in grid_renderer['items']:
|
||||||
|
@ -2333,7 +2336,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
if not isinstance(renderer, dict):
|
if not isinstance(renderer, dict):
|
||||||
continue
|
continue
|
||||||
title = try_get(
|
title = try_get(
|
||||||
renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
|
renderer, (lambda x: x['title']['runs'][0]['text'],
|
||||||
|
lambda x: x['title']['simpleText']), compat_str)
|
||||||
# playlist
|
# playlist
|
||||||
playlist_id = renderer.get('playlistId')
|
playlist_id = renderer.get('playlistId')
|
||||||
if playlist_id:
|
if playlist_id:
|
||||||
|
@ -2341,10 +2345,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
'https://www.youtube.com/playlist?list=%s' % playlist_id,
|
'https://www.youtube.com/playlist?list=%s' % playlist_id,
|
||||||
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||||
video_title=title)
|
video_title=title)
|
||||||
|
continue
|
||||||
# video
|
# video
|
||||||
video_id = renderer.get('videoId')
|
video_id = renderer.get('videoId')
|
||||||
if video_id:
|
if video_id:
|
||||||
yield self._extract_video(renderer)
|
yield self._extract_video(renderer)
|
||||||
|
continue
|
||||||
# channel
|
# channel
|
||||||
channel_id = renderer.get('channelId')
|
channel_id = renderer.get('channelId')
|
||||||
if channel_id:
|
if channel_id:
|
||||||
|
@ -2353,19 +2359,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
'https://www.youtube.com/channel/%s' % channel_id,
|
'https://www.youtube.com/channel/%s' % channel_id,
|
||||||
ie=YoutubeTabIE.ie_key(), video_title=title)
|
ie=YoutubeTabIE.ie_key(), video_title=title)
|
||||||
# show
|
continue
|
||||||
if playlist_id is None: # needs to check for playlist_id, or non-series playlists are recognized twice
|
# generic endpoint URL support
|
||||||
show_playlist_url = try_get(
|
ep_url = urljoin('https://www.youtube.com/', try_get(
|
||||||
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
|
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
|
||||||
compat_str)
|
compat_str))
|
||||||
if show_playlist_url:
|
if ep_url:
|
||||||
playlist_id = self._search_regex(r'/playlist\?list=([0-9a-zA-Z-_]+)', show_playlist_url,
|
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
|
||||||
'playlist id', default=None)
|
if ie.suitable(ep_url):
|
||||||
if playlist_id:
|
|
||||||
title = try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
|
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
"https://www.youtube.com/playlist?list=%s" % playlist_id,
|
ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
|
||||||
ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title)
|
break
|
||||||
|
|
||||||
def _shelf_entries_from_content(self, shelf_renderer):
|
def _shelf_entries_from_content(self, shelf_renderer):
|
||||||
content = shelf_renderer.get('content')
|
content = shelf_renderer.get('content')
|
||||||
|
|
Loading…
Reference in a new issue