mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 19:52:11 +00:00
[YouTube] Fix tests
This commit is contained in:
parent
bafb6dec72
commit
e8198c517b
1 changed files with 48 additions and 7 deletions
|
@ -27,6 +27,8 @@ from ..utils import (
|
||||||
dict_get,
|
dict_get,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
@ -38,6 +40,7 @@ from ..utils import (
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@ -656,6 +659,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'description': 'md5:bf77e03fcae5529475e500129b05668a',
|
'description': 'md5:bf77e03fcae5529475e500129b05668a',
|
||||||
'duration': 177,
|
'duration': 177,
|
||||||
'uploader': 'FlyingKitty',
|
'uploader': 'FlyingKitty',
|
||||||
|
'uploader_id': 'FlyingKitty900',
|
||||||
'upload_date': '20200408',
|
'upload_date': '20200408',
|
||||||
'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
|
'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -678,6 +682,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'description': 'md5:17eccca93a786d51bc67646756894066',
|
'description': 'md5:17eccca93a786d51bc67646756894066',
|
||||||
'duration': 106,
|
'duration': 106,
|
||||||
'uploader': 'Projekt Melody',
|
'uploader': 'Projekt Melody',
|
||||||
|
'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
|
||||||
'upload_date': '20191227',
|
'upload_date': '20191227',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
|
'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
|
||||||
|
@ -929,16 +934,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'id': 'lsguqyKfVQg',
|
'id': 'lsguqyKfVQg',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
|
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
|
||||||
'alt_title': 'Dark Walk - Position Music',
|
'alt_title': 'Dark Walk',
|
||||||
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
|
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
|
||||||
'duration': 133,
|
'duration': 133,
|
||||||
'upload_date': '20151119',
|
'upload_date': '20151119',
|
||||||
'uploader_id': 'IronSoulElf',
|
'uploader_id': 'IronSoulElf',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
|
||||||
'uploader': 'IronSoulElf',
|
'uploader': 'IronSoulElf',
|
||||||
'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
|
||||||
'track': 'Dark Walk - Position Music',
|
'track': 'Dark Walk',
|
||||||
'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
'artist': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
|
||||||
'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
|
'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -2091,7 +2096,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
or microformat.get('lengthSeconds')) \
|
or microformat.get('lengthSeconds')) \
|
||||||
or parse_duration(search_meta('duration'))
|
or parse_duration(search_meta('duration'))
|
||||||
is_live = video_details.get('isLive')
|
is_live = video_details.get('isLive')
|
||||||
owner_profile_url = microformat.get('ownerProfileUrl')
|
|
||||||
|
def gen_owner_profile_url():
|
||||||
|
yield microformat.get('ownerProfileUrl')
|
||||||
|
yield extract_attributes(self._search_regex(
|
||||||
|
r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
|
||||||
|
get_element_by_attribute('itemprop', 'author', webpage),
|
||||||
|
'owner_profile_url', default='')).get('href')
|
||||||
|
|
||||||
|
owner_profile_url = next(
|
||||||
|
(x for x in map(url_or_none, gen_owner_profile_url()) if x),
|
||||||
|
None)
|
||||||
|
|
||||||
if not player_url:
|
if not player_url:
|
||||||
player_url = self._extract_player_url(webpage)
|
player_url = self._extract_player_url(webpage)
|
||||||
|
@ -2176,6 +2191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
info[d_k] = parse_duration(query[k][0])
|
info[d_k] = parse_duration(query[k][0])
|
||||||
|
|
||||||
if video_description:
|
if video_description:
|
||||||
|
# Youtube Music Auto-generated description
|
||||||
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
|
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
|
||||||
if mobj:
|
if mobj:
|
||||||
release_year = mobj.group('release_year')
|
release_year = mobj.group('release_year')
|
||||||
|
@ -2250,7 +2266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
|
lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
|
||||||
info['location'] = stl
|
info['location'] = stl
|
||||||
else:
|
else:
|
||||||
mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
|
# •? doesn't match, but [•]? does; \xa0 = non-breaking space
|
||||||
|
mobj = re.search(r'([^\xa0\s].*?)[\xa0\s]*S(\d+)[\xa0\s]*[•]?[\xa0\s]*E(\d+)', stl)
|
||||||
if mobj:
|
if mobj:
|
||||||
info.update({
|
info.update({
|
||||||
'series': mobj.group(1),
|
'series': mobj.group(1),
|
||||||
|
@ -2261,7 +2278,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
vpir,
|
vpir,
|
||||||
lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
|
lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
|
||||||
list) or []):
|
list) or []):
|
||||||
tbr = tlb.get('toggleButtonRenderer') or {}
|
tbr = traverse_obj(tlb, ('segmentedLikeDislikeButtonRenderer', 'likeButton', 'toggleButtonRenderer'), 'toggleButtonRenderer') or {}
|
||||||
for getter, regex in [(
|
for getter, regex in [(
|
||||||
lambda x: x['defaultText']['accessibility']['accessibilityData'],
|
lambda x: x['defaultText']['accessibility']['accessibilityData'],
|
||||||
r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
|
r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
|
||||||
|
@ -2315,6 +2332,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
elif mrr_title == 'Song':
|
elif mrr_title == 'Song':
|
||||||
info['track'] = mrr_contents_text
|
info['track'] = mrr_contents_text
|
||||||
|
|
||||||
|
# this is not extraction but spelunking!
|
||||||
|
carousel_lockups = traverse_obj(
|
||||||
|
initial_data,
|
||||||
|
('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
|
||||||
|
'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
|
||||||
|
'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
|
||||||
|
expected_type=dict) or []
|
||||||
|
# try to reproduce logic from metadataRowContainerRenderer above (if it still is)
|
||||||
|
fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
|
||||||
|
# multiple_songs ?
|
||||||
|
if len(carousel_lockups) > 1:
|
||||||
|
fields = fields[-1:]
|
||||||
|
for info_row in traverse_obj(
|
||||||
|
carousel_lockups,
|
||||||
|
(0, 'carouselLockupRenderer', 'infoRows', Ellipsis, 'infoRowRenderer'),
|
||||||
|
expected_type=dict):
|
||||||
|
row_title = traverse_obj(info_row, ('title', 'simpleText'))
|
||||||
|
row_text = traverse_obj(info_row, 'defaultMetadata', 'expandedMetadata', expected_type=get_text)
|
||||||
|
if not row_text:
|
||||||
|
continue
|
||||||
|
for name, field in fields:
|
||||||
|
if name == row_title and not info.get(field):
|
||||||
|
info[field] = row_text
|
||||||
|
|
||||||
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
|
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
|
||||||
v = info.get(s_k)
|
v = info.get(s_k)
|
||||||
if v:
|
if v:
|
||||||
|
|
Loading…
Reference in a new issue