mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 11:41:52 +00:00
[webofstories] Tolerate malforder og:title (Closes #8417)
This commit is contained in:
parent
7a0e7779fe
commit
8870bb4653
1 changed files with 39 additions and 25 deletions
|
@ -12,8 +12,7 @@ class WebOfStoriesIE(InfoExtractor):
|
|||
_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
|
||||
_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
|
||||
_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
|
||||
_TESTS = [
|
||||
{
|
||||
_TESTS = [{
|
||||
'url': 'http://www.webofstories.com/play/hans.bethe/71',
|
||||
'md5': '373e4dd915f60cfe3116322642ddf364',
|
||||
'info_dict': {
|
||||
|
@ -24,8 +23,7 @@ class WebOfStoriesIE(InfoExtractor):
|
|||
'description': 'Hans Bethe talks about calculating the temperature of the sun',
|
||||
'duration': 238,
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.webofstories.com/play/55908',
|
||||
'md5': '2985a698e1fe3211022422c4b5ed962c',
|
||||
'info_dict': {
|
||||
|
@ -35,15 +33,31 @@ class WebOfStoriesIE(InfoExtractor):
|
|||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
|
||||
'duration': 169,
|
||||
}
|
||||
},
|
||||
]
|
||||
'skip': 'notfound',
|
||||
}, {
|
||||
# malformed og:title meta
|
||||
'url': 'http://www.webofstories.com/play/54215?o=MS',
|
||||
'info_dict': {
|
||||
'id': '54215',
|
||||
'ext': 'mp4',
|
||||
'title': '"A Leg to Stand On"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Oliver Sacks talks about the death and resurrection of a limb',
|
||||
'duration': 97,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
# Sometimes og:title meta is malformed
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
|
|
Loading…
Reference in a new issue