[YouTube] Bypass age-gating for certain restricted videos

* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client

* Also add and fix tests

* Introduce and use new utility function `update_url()`
This commit is contained in:
dirkf 2022-01-31 04:28:54 +00:00
parent e19ec52322
commit 58988c1421
2 changed files with 168 additions and 45 deletions

View File

@ -42,6 +42,7 @@ from ..utils import (
unescapeHTML,
unified_strdate,
unsmuggle_url,
update_url,
update_url_query,
url_or_none,
urlencode_postdata,
@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
def _call_api(self, ep, query, video_id, fatal=True):
def _call_api(self, ep, query, video_id, fatal=True, headers=None):
data = self._DEFAULT_API_DATA.copy()
data.update(query)
real_headers = {'content-type': 'application/json'}
if headers:
real_headers.update(headers)
return self._download_json(
'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
note='Downloading API JSON', errnote='Unable to download API page',
data=json.dumps(data).encode('utf8'), fatal=fatal,
headers={'content-type': 'application/json'},
headers=real_headers,
query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
def _extract_yt_initial_data(self, video_id, webpage):
@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'channel': 'Philipp Hagemeister',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 10,
'view_count': int,
'like_count': int,
'dislike_count': int,
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'start_time': 1,
'end_time': 9,
}
},
},
{
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 10,
'view_count': int,
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
},
# Normal age-gate video (No vevo, embed allowed), available via embed page
# Age-gated videos
{
'note': 'Age-gated video (No vevo, embed allowed)',
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'duration': 142,
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
'upload_date': '20140605',
'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
'age_limit': 18,
'categories': ['Gaming'],
'tags': 'count:17',
'channel': 'The Witcher',
'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
'view_count': int,
'like_count': int,
},
},
{
# Age-gated video only available with authentication (unavailable
# via embed page workaround)
'note': 'Age-gated video with embed allowed in public site',
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
'info_dict': {
'id': 'HsUATh_Nc2U',
'ext': 'mp4',
'title': 'Godzilla 2 (Official Video)',
'description': 'md5:bf77e03fcae5529475e500129b05668a',
'duration': 177,
'uploader': 'FlyingKitty',
'upload_date': '20200408',
'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
'age_limit': 18,
'categories': ['Entertainment'],
'tags': ['Flyingkitty', 'godzilla 2'],
'channel': 'FlyingKitty',
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
'view_count': int,
'like_count': int,
},
},
{
'note': 'Age-gated video embedable only with clientScreen=EMBED',
'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
'info_dict': {
'id': 'Tq92D6wQ1mg',
'ext': 'mp4',
'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
'description': 'md5:17eccca93a786d51bc67646756894066',
'duration': 106,
'uploader': 'Projekt Melody',
'upload_date': '20191227',
'age_limit': 18,
'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
'categories': ['Entertainment'],
'channel': 'Projekt Melody',
'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
'view_count': int,
'like_count': int,
},
},
{
'note': 'Non-Age-gated non-embeddable video',
'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
'info_dict': {
'id': 'MeJVWBSsPAY',
'ext': 'mp4',
'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
'description': 'Fan Video. Music & Lyrics by OOMPH!.',
'duration': 210,
'uploader': 'Herr Lurik',
'uploader_id': 'st3in234',
'upload_date': '20130730',
'uploader_url': 'http://www.youtube.com/user/st3in234',
'age_limit': 0,
'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
'categories': ['Music'],
'channel': 'Herr Lurik',
'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
'artist': 'OOMPH!',
'view_count': int,
'like_count': int,
},
},
{
'note': 'Non-bypassable age-gated video',
'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
'only_matching': True,
},
{
'note': 'Age-gated video only available with authentication (not via embed workaround)',
'url': 'XgnwCQzjau8',
'only_matching': True,
'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
},
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': 'lqQg6PlCWgI',
'ext': 'mp4',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
'duration': 6085,
'upload_date': '20150827',
'uploader_id': 'olympic',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympic',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
'uploader': r're:Olympics?',
'age_limit': 0,
'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
'categories': ['Sports'],
'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
'channel': 'Olympics',
'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
'view_count': int,
'like_count': int,
},
'params': {
'skip_download': 'requires avconv',
}
},
# Non-square pixels
{
@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response = self._call_api(
'player', {'videoId': video_id}, video_id)
playability_status = player_response.get('playabilityStatus') or {}
if playability_status.get('reason') == 'Sign in to confirm your age':
video_info = self._download_webpage(
base_url + 'get_video_info', video_id,
'Refetching age-gated info webpage',
'unable to download video info webpage', query={
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'html5': 1,
# See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
'c': 'TVHTML5',
'cver': '6.20180913',
}, fatal=False)
if video_info:
pr = self._parse_json(
try_get(
compat_parse_qs(video_info),
lambda x: x['player_response'][0], compat_str) or '{}',
video_id, fatal=False)
if pr and isinstance(pr, dict):
player_response = pr
def is_agegated(playability):
if not isinstance(playability, dict):
return
if playability.get('desktopLegacyAgeGateReason'):
return True
reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
AGE_GATE_REASONS = (
'confirm your age', 'age-restricted', 'inappropriate', # reason
'age_verification_required', 'age_check_required', # status
)
return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
def get_playability_status(response):
return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
playability_status = get_playability_status(player_response)
if (is_agegated(playability_status)
and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
self.report_age_confirmation()
# Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
query = {
'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
'contentCheckOk': True,
'racyCheckOk': True,
'context': {
'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
'thirdParty': {'embedUrl': 'https://google.com'},
},
'videoId': video_id,
}
headers = {
'X-YouTube-Client-Name': '85',
'X-YouTube-Client-Version': '2.0',
'Origin': 'https://www.youtube.com'
}
video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
age_gate_status = get_playability_status(video_info)
if age_gate_status.get('status') == 'OK':
player_response = video_info
playability_status = age_gate_status
trailer_video_id = try_get(
playability_status,
@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for thumbnail in (try_get(
container,
lambda x: x['thumbnail']['thumbnails'], list) or []):
thumbnail_url = thumbnail.get('url')
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'height': int_or_none(thumbnail.get('height')),
'url': thumbnail_url,
'url': update_url(thumbnail_url, query=None, fragment=None),
'width': int_or_none(thumbnail.get('width')),
})
if thumbnails:
@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sbr_tooltip = try_get(
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
if sbr_tooltip:
# however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
like_count, dislike_count = sbr_tooltip.split(' / ')
info.update({
'like_count': str_to_int(like_count),
@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'tags': list,
'view_count': int,
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'categories': ['News & Politics'],
'tags': list,
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'categories': ['News & Politics'],
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
def _real_extract(self, url):
item_id = self._match_id(url)
url = compat_urlparse.urlunparse(
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
url = update_url(url, netloc='www.youtube.com')
# Handle both video/playlist URLs
qs = parse_qs(url)
video_id = qs.get('v', [None])[0]
@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor):
'categories': ['Nonprofits & Activism'],
'tags': list,
'like_count': int,
'dislike_count': int,
},
'params': {
'noplaylist': True,

View File

@ -4121,6 +4121,17 @@ def update_url_query(url, query):
query=compat_urllib_parse_urlencode(qs, True)))
def update_url(url, **kwargs):
"""Replace URL components specified by kwargs
url: compat_str or parsed URL tuple
returns: compat_str"""
if not kwargs:
return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
if not isinstance(url, tuple):
url = compat_urlparse.urlparse(url)
return compat_urlparse.urlunparse(url._replace(**kwargs))
def update_Request(req, url=None, data=None, headers={}, query={}):
req_headers = req.headers.copy()
req_headers.update(headers)