mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-24 19:22:06 +00:00
[options] Added workaround option to execute "n_function"
This commit is contained in:
parent
16f5bbc464
commit
98951bbe76
5 changed files with 98 additions and 1 deletions
|
@ -214,7 +214,10 @@ def t_factory(name, sig_func, url_pattern):
|
||||||
test_id = m.group('id')
|
test_id = m.group('id')
|
||||||
|
|
||||||
def test_func(self):
|
def test_func(self):
|
||||||
basename = 'player-{0}-{1}.js'.format(name, test_id)
|
tn = name
|
||||||
|
if name.endswith('_wd'):
|
||||||
|
tn = name[:-3]
|
||||||
|
basename = 'player-{0}-{1}.js'.format(tn, test_id)
|
||||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||||
|
|
||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
|
@ -241,6 +244,10 @@ def n_sig(jscode, sig_input):
|
||||||
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||||
|
|
||||||
|
|
||||||
|
def n_sig_wd(jscode, sig_input):
|
||||||
|
return YoutubeIE(FakeYDL())._call_n_function_with_webdriver('chrome', jscode, sig_input)
|
||||||
|
|
||||||
|
|
||||||
make_sig_test = t_factory(
|
make_sig_test = t_factory(
|
||||||
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
|
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
|
||||||
for test_spec in _SIG_TESTS:
|
for test_spec in _SIG_TESTS:
|
||||||
|
@ -251,6 +258,11 @@ make_nsig_test = t_factory(
|
||||||
for test_spec in _NSIG_TESTS:
|
for test_spec in _NSIG_TESTS:
|
||||||
make_nsig_test(*test_spec)
|
make_nsig_test(*test_spec)
|
||||||
|
|
||||||
|
make_nsig_wd_test = t_factory(
|
||||||
|
'nsig_wd', n_sig_wd, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
|
||||||
|
for test_spec in _NSIG_TESTS:
|
||||||
|
make_nsig_wd_test(*test_spec)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -419,6 +419,7 @@ def _real_main(argv=None):
|
||||||
'call_home': opts.call_home,
|
'call_home': opts.call_home,
|
||||||
'sleep_interval': opts.sleep_interval,
|
'sleep_interval': opts.sleep_interval,
|
||||||
'max_sleep_interval': opts.max_sleep_interval,
|
'max_sleep_interval': opts.max_sleep_interval,
|
||||||
|
'webdriver': opts.webdriver,
|
||||||
'external_downloader': opts.external_downloader,
|
'external_downloader': opts.external_downloader,
|
||||||
'list_thumbnails': opts.list_thumbnails,
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
|
|
|
@ -2448,6 +2448,11 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import BaseHTTPServer as compat_http_server
|
import BaseHTTPServer as compat_http_server
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urllib.parse import quote as compat_urllib_quote
|
||||||
|
except ImportError: # Python 2
|
||||||
|
from urllib import quote as compat_urllib_quote
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||||
|
@ -3560,6 +3565,7 @@ __all__ = [
|
||||||
'compat_tokenize_tokenize',
|
'compat_tokenize_tokenize',
|
||||||
'compat_urllib_error',
|
'compat_urllib_error',
|
||||||
'compat_urllib_parse',
|
'compat_urllib_parse',
|
||||||
|
'compat_urllib_quote',
|
||||||
'compat_urllib_request',
|
'compat_urllib_request',
|
||||||
'compat_urllib_request_DataHandler',
|
'compat_urllib_request_DataHandler',
|
||||||
'compat_urllib_response',
|
'compat_urllib_response',
|
||||||
|
|
|
@ -9,6 +9,7 @@ import os.path
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
|
import importlib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -19,6 +20,7 @@ from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_parse_qs as compat_parse_qs,
|
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
|
compat_urllib_quote,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_zip as zip,
|
compat_zip as zip,
|
||||||
)
|
)
|
||||||
|
@ -1464,6 +1466,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
super(YoutubeIE, self).__init__(*args, **kwargs)
|
super(YoutubeIE, self).__init__(*args, **kwargs)
|
||||||
self._code_cache = {}
|
self._code_cache = {}
|
||||||
self._player_cache = {}
|
self._player_cache = {}
|
||||||
|
self._webdriver = None
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
if self._webdriver is not None:
|
||||||
|
self._webdriver.quit()
|
||||||
|
|
||||||
# *ytcfgs, webpage=None
|
# *ytcfgs, webpage=None
|
||||||
def _extract_player_url(self, *ytcfgs, **kw_webpage):
|
def _extract_player_url(self, *ytcfgs, **kw_webpage):
|
||||||
|
@ -1633,6 +1640,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
raise ExtractorError('Cannot decrypt nsig without player_url')
|
raise ExtractorError('Cannot decrypt nsig without player_url')
|
||||||
|
|
||||||
|
webdriver_type = self._downloader.params.get('webdriver', None)
|
||||||
|
if webdriver_type is not None:
|
||||||
|
try:
|
||||||
|
jscode = self._load_player(video_id, player_url)
|
||||||
|
ret = self._call_n_function_with_webdriver(webdriver_type, jscode, n)
|
||||||
|
except Exception as e:
|
||||||
|
self.report_warning(
|
||||||
|
'%s (%s %s)' % (
|
||||||
|
'Unable to decode n-parameter: download likely to be throttled',
|
||||||
|
error_to_compat_str(e),
|
||||||
|
traceback.format_exc()),
|
||||||
|
video_id=video_id)
|
||||||
|
return
|
||||||
|
self.write_debug('Decrypted nsig(with webdriver) {0} => {1}'.format(n, ret))
|
||||||
|
return ret
|
||||||
|
|
||||||
try:
|
try:
|
||||||
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
|
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -1656,6 +1679,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
|
self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def _call_n_function_with_webdriver(self, webdriver_type, jscode, n_param):
|
||||||
|
if self._webdriver is None:
|
||||||
|
wd = importlib.import_module('selenium.webdriver')
|
||||||
|
if webdriver_type == 'firefox': # geckodriver
|
||||||
|
o = wd.FirefoxOptions()
|
||||||
|
o.headless = True
|
||||||
|
s = wd.firefox.service.Service(log_path=os.path.devnull)
|
||||||
|
self._webdriver = wd.Firefox(options=o, service=s)
|
||||||
|
elif webdriver_type == 'chrome': # chromedriver
|
||||||
|
o = wd.ChromeOptions()
|
||||||
|
o.headless = True
|
||||||
|
"""
|
||||||
|
If you are using the snap version of the chromium, chromedriver is included in the snap package.
|
||||||
|
You should use that driver.
|
||||||
|
$ cd /snap/bin && sudo ln -s -T chromium.chromedriver chromedriver
|
||||||
|
or
|
||||||
|
s = wd.chrome.service.Service(executable_path='chromium.chromedriver')
|
||||||
|
self._webdriver = wd.Chrome(options=o, service=s)
|
||||||
|
"""
|
||||||
|
self._webdriver = wd.Chrome(options=o)
|
||||||
|
elif webdriver_type == 'edge': # msedgedriver
|
||||||
|
o = wd.EdgeOptions()
|
||||||
|
o.headless = True
|
||||||
|
self._webdriver = wd.Edge(options=o)
|
||||||
|
elif webdriver_type == 'safari': # safaridriver
|
||||||
|
"""
|
||||||
|
safaridriver does not have headless-mode. :(
|
||||||
|
But macOS includes safaridriver by default.
|
||||||
|
To enable automation on safaridriver, run the following command once from the admin terminal.
|
||||||
|
# safaridriver --enable
|
||||||
|
"""
|
||||||
|
self._webdriver = wd.Safari()
|
||||||
|
else:
|
||||||
|
raise ExtractorError('unsupported webdriver type: %s' % (webdriver_type))
|
||||||
|
self._webdriver.get('about:blank')
|
||||||
|
funcname = self._extract_n_function_name(jscode)
|
||||||
|
alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||||
|
dummyfunc = ''.join(random.choice(alphabet) for _ in range(8))
|
||||||
|
f = ('return ((e) => {{'
|
||||||
|
'const d = decodeURIComponent(e);'
|
||||||
|
'const p = d.lastIndexOf("}}");'
|
||||||
|
'const th = d.substring(0, p);'
|
||||||
|
'const bh = d.substring(p);'
|
||||||
|
'const m = "var {0};" + th + ";{0} = {1};" + bh;'
|
||||||
|
'const s = document.createElement("script");'
|
||||||
|
's.innerHTML = m;'
|
||||||
|
'document.body.append(s);'
|
||||||
|
'return {0}("{2}");'
|
||||||
|
'}})("{3}");').format(dummyfunc, funcname, n_param, compat_urllib_quote(jscode))
|
||||||
|
n = self._webdriver.execute_script(f)
|
||||||
|
return n
|
||||||
|
|
||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
func_name, idx = self._search_regex(
|
func_name, idx = self._search_regex(
|
||||||
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
||||||
|
|
|
@ -576,6 +576,9 @@ def parseOpts(overrideArguments=None):
|
||||||
'Upper bound of a range for randomized sleep before each download '
|
'Upper bound of a range for randomized sleep before each download '
|
||||||
'(maximum possible number of seconds to sleep). Must only be used '
|
'(maximum possible number of seconds to sleep). Must only be used '
|
||||||
'along with --min-sleep-interval.'))
|
'along with --min-sleep-interval.'))
|
||||||
|
workarounds.add_option(
|
||||||
|
'--webdriver', metavar='TYPE', dest='webdriver', default=None,
|
||||||
|
help='Specify webdriver type when you want to use selenium webdriver to execute "n_function" : "firefox", "chrome", "edge", or "safari"')
|
||||||
|
|
||||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
|
|
Loading…
Reference in a new issue