mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-10-31 14:45:26 +00:00
[youtube] Fix ytsearch* when cookies are provided
Closes #11924 The API with `page` is no longer used in browsers, and YouTube always returns {'reload': 'now'} when cookies are provided. See http://youtube.github.io/spfjs/documentation/start/ for how SPF works. Basically appending static link with a `spf` parameter yields the corresponding dynamic link.
This commit is contained in:
parent
c54c01f82d
commit
a22b2fd19b
2 changed files with 15 additions and 8 deletions
|
@ -1,6 +1,7 @@
|
|||
version <unreleased>
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix ytsearch when cookies are provided (#11924)
|
||||
+ [bilibili] Support new Bangumi URLs (#11845)
|
||||
|
||||
version 2017.02.01
|
||||
|
|
|
@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
|
|||
videos = []
|
||||
limit = n
|
||||
|
||||
url_query = {
|
||||
'search_query': query.encode('utf-8'),
|
||||
}
|
||||
url_query.update(self._EXTRA_QUERY_ARGS)
|
||||
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
url_query = {
|
||||
'search_query': query.encode('utf-8'),
|
||||
'page': pagenum,
|
||||
'spf': 'navigate',
|
||||
}
|
||||
url_query.update(self._EXTRA_QUERY_ARGS)
|
||||
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
|
||||
data = self._download_json(
|
||||
result_url, video_id='query "%s"' % query,
|
||||
note='Downloading page %s' % pagenum,
|
||||
errnote='Unable to download API page')
|
||||
errnote='Unable to download API page',
|
||||
query={'spf': 'navigate'})
|
||||
html_content = data[1]['body']['content']
|
||||
|
||||
if 'class="search-message' in html_content:
|
||||
|
@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
|
|||
videos += new_videos
|
||||
if not new_videos or len(videos) > limit:
|
||||
break
|
||||
next_link = self._html_search_regex(
|
||||
r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
|
||||
html_content, 'next link', default=None)
|
||||
if next_link is None:
|
||||
break
|
||||
result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
|
||||
|
||||
if len(videos) > n:
|
||||
videos = videos[:n]
|
||||
|
|
Loading…
Reference in a new issue