mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 19:52:11 +00:00
[bbc] Extract description and timestamp from __INITIAL_DATA__ (#28774)
This commit is contained in:
parent
9f6c03a006
commit
41920fc80e
1 changed files with 24 additions and 1 deletions
|
@ -11,6 +11,7 @@ from ..compat import (
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
@ -25,8 +26,10 @@ from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
@ -761,8 +764,17 @@ class BBCIE(BBCCoUkIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# custom redirection to www.bbc.com
|
# custom redirection to www.bbc.com
|
||||||
|
# also, video with window.__INITIAL_DATA__
|
||||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': 'p02xzws1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Pluto may have 'nitrogen glaciers'",
|
||||||
|
'description': "Pluto could have glaciers of nitrogen ice, new photographs from Nasa's New Horizons probe suggest.",
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1437785037,
|
||||||
|
'upload_date': '20150725',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
|
@ -1164,12 +1176,23 @@ class BBCIE(BBCCoUkIE):
|
||||||
continue
|
continue
|
||||||
formats, subtitles = self._download_media_selector(item_id)
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
item_desc = try_get(
|
||||||
|
media,
|
||||||
|
lambda x: x['summary']['blocks'][0]['model']['text'],
|
||||||
|
compat_str)
|
||||||
|
item_time = None
|
||||||
|
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
|
||||||
|
if try_get(meta, lambda x: x['label']) == 'Published':
|
||||||
|
item_time = unified_timestamp(meta.get('timestamp'))
|
||||||
|
break
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': item_id,
|
'id': item_id,
|
||||||
'title': item_title,
|
'title': item_title,
|
||||||
'thumbnail': item.get('holdingImageUrl'),
|
'thumbnail': item.get('holdingImageUrl'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'timestamp': item_time,
|
||||||
|
'description': strip_or_none(item_desc),
|
||||||
})
|
})
|
||||||
for resp in (initial_data.get('data') or {}).values():
|
for resp in (initial_data.get('data') or {}).values():
|
||||||
name = resp.get('name')
|
name = resp.get('name')
|
||||||
|
|
Loading…
Reference in a new issue