mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-22 02:01:50 +00:00
[extractor/ted] fix subtitles extraction
This commit is contained in:
parent
a803582717
commit
78c50518f9
1 changed files with 13 additions and 15 deletions
|
@ -46,10 +46,8 @@ class TEDIE(InfoExtractor):
|
||||||
'consciousness, but that half the time our brains are '
|
'consciousness, but that half the time our brains are '
|
||||||
'actively fooling us.'),
|
'actively fooling us.'),
|
||||||
'uploader': 'Dan Dennett',
|
'uploader': 'Dan Dennett',
|
||||||
'width': 853,
|
|
||||||
'duration': 1308,
|
'duration': 1308,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
'tags': list,
|
'tags': list,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -77,7 +75,7 @@ class TEDIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Be passionate. Be courageous. Be your best.',
|
'title': 'Be passionate. Be courageous. Be your best.',
|
||||||
'uploader': 'Gabby Giffords and Mark Kelly',
|
'uploader': 'Gabby Giffords and Mark Kelly',
|
||||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
'description': 'md5:37c09e06ce87ddfdb65bf0112ea3551c',
|
||||||
'duration': 1128,
|
'duration': 1128,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -117,7 +115,6 @@ class TEDIE(InfoExtractor):
|
||||||
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
|
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
|
||||||
'uploader': 'Tom Thum',
|
'uploader': 'Tom Thum',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
'tags': list,
|
'tags': list,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -308,30 +305,31 @@ class TEDIE(InfoExtractor):
|
||||||
'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
|
'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
|
||||||
'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
|
'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
'subtitles': self._get_subtitles(player_talk),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': float_or_none(talk_info.get('duration')),
|
'duration': float_or_none(talk_info.get('duration')),
|
||||||
'view_count': int_or_none(data.get('viewed_count')),
|
'view_count': int_or_none(data.get('viewed_count')),
|
||||||
'comment_count': int_or_none(
|
|
||||||
try_get(data, lambda x: x['comments']['count'])),
|
|
||||||
'tags': try_get(talk_info, lambda x: x['tags'], list),
|
'tags': try_get(talk_info, lambda x: x['tags'], list),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, talk_info):
|
def _get_subtitles(self, player_talk):
|
||||||
|
language_list = try_get(player_talk, lambda x: x['languages'], list)
|
||||||
|
if not language_list:
|
||||||
|
return {}
|
||||||
|
metadata = try_get(player_talk, lambda x: x['resources']['hls']['metadata'], compat_str) or ''
|
||||||
|
proj_master_id = self._search_regex(r'project_masters/([^/]+)/', metadata, 'project master id', fatal=False)
|
||||||
|
if not proj_master_id:
|
||||||
|
return {}
|
||||||
|
|
||||||
sub_lang_list = {}
|
sub_lang_list = {}
|
||||||
for language in try_get(
|
for language in language_list:
|
||||||
talk_info,
|
|
||||||
(lambda x: x['downloads']['languages'],
|
|
||||||
lambda x: x['languages']), list):
|
|
||||||
lang_code = language.get('languageCode') or language.get('ianaCode')
|
lang_code = language.get('languageCode') or language.get('ianaCode')
|
||||||
if not lang_code:
|
if not lang_code:
|
||||||
continue
|
continue
|
||||||
sub_lang_list[lang_code] = [
|
sub_lang_list[lang_code] = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, lang_code, ext),
|
'url': 'https://hls.ted.com/project_masters/%s/subtitles/%s/full.vtt' % (proj_master_id, lang_code),
|
||||||
'ext': ext,
|
|
||||||
}
|
}
|
||||||
for ext in ['ted', 'srt']
|
|
||||||
]
|
]
|
||||||
return sub_lang_list
|
return sub_lang_list
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue