mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 03:32:05 +00:00
[vimeo] fix album extraction
closes #1933 closes #15704 closes #15855 closes #18967 closes #21986
This commit is contained in:
parent
5efbc1366f
commit
eb9c9c74a6
1 changed files with 42 additions and 16 deletions
|
@ -2,12 +2,14 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import functools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_kwargs,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
@ -19,6 +21,7 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
|
OnDemandPagedList,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
qualities,
|
qualities,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
|
@ -98,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||||
webpage, 'vuid', group='vuid')
|
webpage, 'vuid', group='vuid')
|
||||||
return xsrft, vuid
|
return xsrft, vuid
|
||||||
|
|
||||||
|
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
|
||||||
|
vimeo_config = self._search_regex(
|
||||||
|
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
|
||||||
|
webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
|
||||||
|
if vimeo_config:
|
||||||
|
return self._parse_json(vimeo_config, video_id)
|
||||||
|
|
||||||
def _set_vimeo_cookie(self, name, value):
|
def _set_vimeo_cookie(self, name, value):
|
||||||
self._set_cookie('vimeo.com', name, value)
|
self._set_cookie('vimeo.com', name, value)
|
||||||
|
|
||||||
|
@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
\.
|
\.
|
||||||
)?
|
)?
|
||||||
vimeo(?P<pro>pro)?\.com/
|
vimeo(?P<pro>pro)?\.com/
|
||||||
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||||
(?:.*?/)?
|
(?:.*?/)?
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
|
@ -580,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
# and latter we extract those that are Vimeo specific.
|
# and latter we extract those that are Vimeo specific.
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
vimeo_config = self._search_regex(
|
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
|
||||||
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
|
|
||||||
'vimeo config', default=None)
|
|
||||||
if vimeo_config:
|
if vimeo_config:
|
||||||
seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
|
seed_status = vimeo_config.get('seed_status', {})
|
||||||
if seed_status.get('state') == 'failed':
|
if seed_status.get('state') == 'failed':
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, seed_status['title']),
|
'%s said: %s' % (self.IE_NAME, seed_status['title']),
|
||||||
|
@ -905,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE):
|
||||||
|
|
||||||
class VimeoAlbumIE(VimeoChannelIE):
|
class VimeoAlbumIE(VimeoChannelIE):
|
||||||
IE_NAME = 'vimeo:album'
|
IE_NAME = 'vimeo:album'
|
||||||
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
||||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vimeo.com/album/2632481',
|
'url': 'https://vimeo.com/album/2632481',
|
||||||
|
@ -925,21 +933,39 @@ class VimeoAlbumIE(VimeoChannelIE):
|
||||||
'params': {
|
'params': {
|
||||||
'videopassword': 'youtube-dl',
|
'videopassword': 'youtube-dl',
|
||||||
}
|
}
|
||||||
}, {
|
|
||||||
'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# TODO: respect page number
|
|
||||||
'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
def _page_url(self, base_url, pagenum):
|
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
||||||
return '%s/page:%d/' % (base_url, pagenum)
|
api_page = page + 1
|
||||||
|
query = {
|
||||||
|
'fields': 'link',
|
||||||
|
'page': api_page,
|
||||||
|
'per_page': self._PAGE_SIZE,
|
||||||
|
}
|
||||||
|
if hashed_pass:
|
||||||
|
query['_hashed_pass'] = hashed_pass
|
||||||
|
videos = self._download_json(
|
||||||
|
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||||
|
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||||
|
'Authorization': 'jwt ' + authorizaion,
|
||||||
|
})['data']
|
||||||
|
for video in videos:
|
||||||
|
link = video.get('link')
|
||||||
|
if not link:
|
||||||
|
continue
|
||||||
|
yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
|
webpage = self._download_webpage(url, album_id)
|
||||||
|
webpage = self._login_list_password(url, album_id, webpage)
|
||||||
|
api_config = self._extract_vimeo_config(webpage, album_id)['api']
|
||||||
|
entries = OnDemandPagedList(functools.partial(
|
||||||
|
self._fetch_page, album_id, api_config['jwt'],
|
||||||
|
api_config.get('hashed_pass')), self._PAGE_SIZE)
|
||||||
|
return self.playlist_result(entries, album_id, self._html_search_regex(
|
||||||
|
r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
|
||||||
|
|
||||||
|
|
||||||
class VimeoGroupsIE(VimeoAlbumIE):
|
class VimeoGroupsIE(VimeoAlbumIE):
|
||||||
|
|
Loading…
Reference in a new issue