[youtube:tab] Pass innertube context and x-goog-visitor-id header along with continuation requests (closes #28702)

2025-05-16 23:52:47 +00:00 · 2021-04-09 02:09:52 +07:00 · 2021-04-09 02:09:52 +07:00 · 1b0a13f33c
commit 1b0a13f33c
parent 27e5a4464d
1 changed files with 27 additions and 15 deletions
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -306,7 +306,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
        return self._parse_json(
            self._search_regex(
                r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
-                default='{}'), video_id, fatal=False)
+                default='{}'), video_id, fatal=False) or {}

    def _extract_video(self, renderer):
        video_id = renderer['videoId']
@ -2475,7 +2475,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            ctp = continuation_ep.get('clickTrackingParams')
            return YoutubeTabIE._build_continuation_query(continuation, ctp)

-    def _entries(self, tab, identity_token):
+    def _entries(self, tab, item_id, webpage):
        tab_content = try_get(tab, lambda x: x['content'], dict)
        if not tab_content:
            return
@ -2535,26 +2535,37 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                yield entry
            continuation = self._extract_continuation(rich_grid_renderer)

+        ytcfg = self._extract_ytcfg(item_id, webpage)
+        client_version = try_get(
+            ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
+
        headers = {
            'x-youtube-client-name': '1',
-            'x-youtube-client-version': '2.20201112.04.01',
+            'x-youtube-client-version': client_version,
            'content-type': 'application/json',
        }
+
+        context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
+            'client': {
+                'clientName': 'WEB',
+                'clientVersion': client_version,
+            }
+        }
+        visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
+
+        identity_token = self._extract_identity_token(ytcfg, webpage)
        if identity_token:
            headers['x-youtube-identity-token'] = identity_token

        data = {
-            'context': {
-                'client': {
-                    'clientName': 'WEB',
-                    'clientVersion': '2.20201021.03.00',
-                }
-            },
+            'context': context,
        }

        for page_num in itertools.count(1):
            if not continuation:
                break
+            if visitor_data:
+                headers['x-goog-visitor-id'] = visitor_data
            data['continuation'] = continuation['continuation']
            data['clickTracking'] = {
                'clickTrackingParams': continuation['itct']
@ -2579,6 +2590,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            if not response:
                break

+            visitor_data = try_get(
+                response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
+
            continuation_contents = try_get(
                response, lambda x: x['continuationContents'], dict)
            if continuation_contents:
@ -2687,7 +2701,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                alerts.append(text)
        return '\n'.join(alerts)

-    def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
+    def _extract_from_tabs(self, item_id, webpage, data, tabs):
        selected_tab = self._extract_selected_tab(tabs)
        renderer = try_get(
            data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
@ -2712,7 +2726,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                if renderer:
                    title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
        playlist = self.playlist_result(
-            self._entries(selected_tab, identity_token),
+            self._entries(selected_tab, item_id, webpage),
            playlist_id=playlist_id, playlist_title=title,
            playlist_description=description)
        playlist.update(self._extract_uploader(data))
@ -2736,8 +2750,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            self._playlist_entries(playlist), playlist_id=playlist_id,
            playlist_title=title)

-    def _extract_identity_token(self, webpage, item_id):
-        ytcfg = self._extract_ytcfg(item_id, webpage)
+    def _extract_identity_token(self, ytcfg, webpage):
        if ytcfg:
            token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
            if token:
@ -2760,12 +2773,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
        webpage = self._download_webpage(url, item_id)
-        identity_token = self._extract_identity_token(webpage, item_id)
        data = self._extract_yt_initial_data(item_id, webpage)
        tabs = try_get(
            data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
        if tabs:
-            return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
+            return self._extract_from_tabs(item_id, webpage, data, tabs)
        playlist = try_get(
            data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
        if playlist: