mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-21 09:41:50 +00:00
[cda] Implement birthday verification (closes #12789)
This commit is contained in:
parent
5401bea27f
commit
0c26548601
5 changed files with 125 additions and 9 deletions
|
@ -1,6 +1,7 @@
|
||||||
version <unreleased>
|
version <unreleased>
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
+ [cda] Support birthday verification (#12789)
|
||||||
* [leeco] Fix extraction (#12974)
|
* [leeco] Fix extraction (#12974)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ from youtube_dl.utils import (
|
||||||
limit_length,
|
limit_length,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
month_by_name,
|
month_by_name,
|
||||||
|
multipart_encode,
|
||||||
ohdave_rsa_encrypt,
|
ohdave_rsa_encrypt,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -620,6 +621,16 @@ class TestUtil(unittest.TestCase):
|
||||||
'http://example.com/path', {'test': '第二行тест'})),
|
'http://example.com/path', {'test': '第二行тест'})),
|
||||||
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
||||||
|
|
||||||
|
def test_multipart_encode(self):
|
||||||
|
self.assertEqual(
|
||||||
|
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
|
||||||
|
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
|
||||||
|
self.assertEqual(
|
||||||
|
multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
|
||||||
|
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
|
||||||
|
self.assertRaises(
|
||||||
|
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||||
|
|
||||||
def test_dict_get(self):
|
def test_dict_get(self):
|
||||||
FALSE_VALUES = {
|
FALSE_VALUES = {
|
||||||
'none': None,
|
'none': None,
|
||||||
|
|
|
@ -9,7 +9,10 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
multipart_encode,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
random_birthday,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,7 +30,8 @@ class CDAIE(InfoExtractor):
|
||||||
'description': 'md5:269ccd135d550da90d1662651fcb9772',
|
'description': 'md5:269ccd135d550da90d1662651fcb9772',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'average_rating': float,
|
'average_rating': float,
|
||||||
'duration': 39
|
'duration': 39,
|
||||||
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cda.pl/video/57413289',
|
'url': 'http://www.cda.pl/video/57413289',
|
||||||
|
@ -41,13 +45,41 @@ class CDAIE(InfoExtractor):
|
||||||
'uploader': 'crash404',
|
'uploader': 'crash404',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'average_rating': float,
|
'average_rating': float,
|
||||||
'duration': 137
|
'duration': 137,
|
||||||
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# Age-restricted
|
||||||
|
'url': 'http://www.cda.pl/video/1273454c4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1273454c4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Bronson (2008) napisy HD 1080p',
|
||||||
|
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
||||||
|
'height': 1080,
|
||||||
|
'uploader': 'boniek61',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 5554,
|
||||||
|
'age_limit': 18,
|
||||||
|
'view_count': int,
|
||||||
|
'average_rating': float,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||||
|
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
||||||
|
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
||||||
|
data, content_type = multipart_encode(form_data)
|
||||||
|
return self._download_webpage(
|
||||||
|
urljoin(url, '/a/validatebirth'), video_id, *args,
|
||||||
|
data=data, headers={
|
||||||
|
'Referer': url,
|
||||||
|
'Content-Type': content_type,
|
||||||
|
}, **kwargs)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||||
|
@ -57,6 +89,13 @@ class CDAIE(InfoExtractor):
|
||||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||||
|
|
||||||
|
need_confirm_age = False
|
||||||
|
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
|
||||||
|
webpage, 'birthday validate form', default=None):
|
||||||
|
webpage = self._download_age_confirm_page(
|
||||||
|
url, video_id, note='Confirming age')
|
||||||
|
need_confirm_age = True
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
uploader = self._search_regex(r'''(?x)
|
uploader = self._search_regex(r'''(?x)
|
||||||
|
@ -81,6 +120,7 @@ class CDAIE(InfoExtractor):
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': None,
|
'duration': None,
|
||||||
|
'age_limit': 18 if need_confirm_age else 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
def extract_format(page, version):
|
def extract_format(page, version):
|
||||||
|
@ -121,7 +161,12 @@ class CDAIE(InfoExtractor):
|
||||||
for href, resolution in re.findall(
|
for href, resolution in re.findall(
|
||||||
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
||||||
webpage):
|
webpage):
|
||||||
webpage = self._download_webpage(
|
if need_confirm_age:
|
||||||
|
handler = self._download_age_confirm_page
|
||||||
|
else:
|
||||||
|
handler = self._download_webpage
|
||||||
|
|
||||||
|
webpage = handler(
|
||||||
self._BASE_URL + href, video_id,
|
self._BASE_URL + href, video_id,
|
||||||
'Downloading %s version information' % resolution, fatal=False)
|
'Downloading %s version information' % resolution, fatal=False)
|
||||||
if not webpage:
|
if not webpage:
|
||||||
|
@ -129,6 +174,7 @@ class CDAIE(InfoExtractor):
|
||||||
# invalid version is requested.
|
# invalid version is requested.
|
||||||
self.report_warning('Unable to download %s version information' % resolution)
|
self.report_warning('Unable to download %s version information' % resolution)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
extract_format(webpage, resolution)
|
extract_format(webpage, resolution)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -11,6 +10,7 @@ from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
qualities,
|
qualities,
|
||||||
|
random_birthday,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
query = random_birthday('birth_year', 'birth_month', 'birth_day')
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||||
video_id, query={
|
video_id, query=query)
|
||||||
'birth_month': random.randint(1, 12),
|
|
||||||
'birth_day': random.randint(1, 31),
|
|
||||||
'birth_year': random.randint(1950, 1995),
|
|
||||||
})
|
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ import contextlib
|
||||||
import ctypes
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
import email.utils
|
import email.utils
|
||||||
|
import email.header
|
||||||
import errno
|
import errno
|
||||||
import functools
|
import functools
|
||||||
import gzip
|
import gzip
|
||||||
|
@ -2097,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||||
return new_req
|
return new_req
|
||||||
|
|
||||||
|
|
||||||
|
def try_multipart_encode(data, boundary):
|
||||||
|
content_type = 'multipart/form-data; boundary=%s' % boundary
|
||||||
|
|
||||||
|
out = b''
|
||||||
|
for k, v in data.items():
|
||||||
|
out += b'--' + boundary.encode('ascii') + b'\r\n'
|
||||||
|
if isinstance(k, compat_str):
|
||||||
|
k = k.encode('utf-8')
|
||||||
|
if isinstance(v, compat_str):
|
||||||
|
v = v.encode('utf-8')
|
||||||
|
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
|
||||||
|
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
|
||||||
|
content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n'
|
||||||
|
if boundary.encode('ascii') in content:
|
||||||
|
raise ValueError('Boundary overlaps with data')
|
||||||
|
out += content
|
||||||
|
|
||||||
|
out += b'--' + boundary.encode('ascii') + b'--\r\n'
|
||||||
|
|
||||||
|
return out, content_type
|
||||||
|
|
||||||
|
|
||||||
|
def multipart_encode(data, boundary=None):
|
||||||
|
'''
|
||||||
|
Encode a dict to RFC 7578-compliant form-data
|
||||||
|
|
||||||
|
data:
|
||||||
|
A dict where keys and values can be either Unicode or bytes-like
|
||||||
|
objects.
|
||||||
|
boundary:
|
||||||
|
If specified a Unicode object, it's used as the boundary. Otherwise
|
||||||
|
a random boundary is generated.
|
||||||
|
|
||||||
|
Reference: https://tools.ietf.org/html/rfc7578
|
||||||
|
'''
|
||||||
|
has_specified_boundary = boundary is not None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if boundary is None:
|
||||||
|
boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
|
||||||
|
|
||||||
|
try:
|
||||||
|
out, content_type = try_multipart_encode(data, boundary)
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
if has_specified_boundary:
|
||||||
|
raise
|
||||||
|
boundary = None
|
||||||
|
|
||||||
|
return out, content_type
|
||||||
|
|
||||||
|
|
||||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||||
if isinstance(key_or_keys, (list, tuple)):
|
if isinstance(key_or_keys, (list, tuple)):
|
||||||
for key in key_or_keys:
|
for key in key_or_keys:
|
||||||
|
@ -3760,3 +3813,11 @@ def write_xattr(path, key, value):
|
||||||
"Couldn't find a tool to set the xattrs. "
|
"Couldn't find a tool to set the xattrs. "
|
||||||
"Install either the python 'xattr' module, "
|
"Install either the python 'xattr' module, "
|
||||||
"or the 'xattr' binary.")
|
"or the 'xattr' binary.")
|
||||||
|
|
||||||
|
|
||||||
|
def random_birthday(year_field, month_field, day_field):
|
||||||
|
return {
|
||||||
|
year_field: str(random.randint(1950, 1995)),
|
||||||
|
month_field: str(random.randint(1, 12)),
|
||||||
|
day_field: str(random.randint(1, 31)),
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue