From 23ae8277c8f1140d84f67512d9b2e5577c73b590 Mon Sep 17 00:00:00 2001 From: unknown <7951720+u-n-k-n-o-w-n@users.noreply.github.com> Date: Mon, 22 Aug 2022 19:10:12 +0900 Subject: [PATCH] JS operation by webdriver was made into a component --- youtube_dl/extractor/youtube.py | 118 +++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5c5b33010..a79d0629c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -26,6 +26,7 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + check_executable, clean_html, dict_get, error_to_compat_str, @@ -1466,11 +1467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): super(YoutubeIE, self).__init__(*args, **kwargs) self._code_cache = {} self._player_cache = {} - self._webdriver = None - - def __del__(self): - if self._webdriver is not None: - self._webdriver.quit() + self._webdriver_wrapper = None # *ytcfgs, webpage=None def _extract_player_url(self, *ytcfgs, **kw_webpage): @@ -1680,40 +1677,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return ret def _call_n_function_with_webdriver(self, webdriver_type, jscode, n_param): - if self._webdriver is None: - wd = importlib.import_module('selenium.webdriver') - if webdriver_type == 'firefox': # geckodriver - o = wd.FirefoxOptions() - o.headless = True - s = wd.firefox.service.Service(log_path=os.path.devnull) - self._webdriver = wd.Firefox(options=o, service=s) - elif webdriver_type == 'chrome': # chromedriver - o = wd.ChromeOptions() - o.headless = True - """ - If you are using the snap version of the chromium, chromedriver is included in the snap package. - You should use that driver. - $ cd /snap/bin && sudo ln -s -T chromium.chromedriver chromedriver - or - s = wd.chrome.service.Service(executable_path='chromium.chromedriver') - self._webdriver = wd.Chrome(options=o, service=s) - """ - self._webdriver = wd.Chrome(options=o) - elif webdriver_type == 'edge': # msedgedriver - o = wd.EdgeOptions() - o.headless = True - self._webdriver = wd.Edge(options=o) - elif webdriver_type == 'safari': # safaridriver - """ - safaridriver does not have headless-mode. :( - But macOS includes safaridriver by default. - To enable automation on safaridriver, run the following command once from the admin terminal. - # safaridriver --enable - """ - self._webdriver = wd.Safari() - else: - raise ExtractorError('unsupported webdriver type: %s' % (webdriver_type)) - self._webdriver.get('about:blank') + if self._webdriver_wrapper is None: + self._webdriver_wrapper = WebDriverJSWrapper(webdriver_type) + self._webdriver_wrapper.get('about:blank') funcname = self._extract_n_function_name(jscode) alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' dummyfunc = ''.join(random.choice(alphabet) for _ in range(8)) @@ -1728,7 +1694,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'document.body.append(s);' 'return {0}("{2}");' '}})("{3}");').format(dummyfunc, funcname, n_param, compat_urllib_quote(jscode)) - n = self._webdriver.execute_script(f) + n = self._webdriver_wrapper.executeJS(f) return n def _extract_n_function_name(self, jscode): @@ -3884,3 +3850,75 @@ class YoutubeTruncatedIDIE(InfoExtractor): raise ExtractorError( 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url), expected=True) + + +class WebDriverJSWrapper(object): + """WebDriver Wrapper class""" + + def __init__(self, webdriver_type, pageload_timeout=10, script_timeout=5): + self._webdriver = None + try: + wd = importlib.import_module('selenium.webdriver') + except ImportError as e: + self._raise_exception('Failed to import module "selenium.webdriver"', cause=e) + + if webdriver_type == 'firefox': # geckodriver + if not check_executable('geckodriver', ['--version']): + self._raise_exception('geckodriver not found in PATH') + o = wd.FirefoxOptions() + o.headless = True + s = wd.firefox.service.Service(log_path=os.path.devnull) + self._webdriver = wd.Firefox(options=o, service=s) + elif webdriver_type == 'chrome': # chromedriver + if not check_executable('chromedriver', ['--version']): + self._raise_exception('chromedriver not found in PATH') + o = wd.ChromeOptions() + o.headless = True + """ + If you are using the snap version of the chromium, chromedriver is included in the snap package. + You should use that driver. + $ cd /snap/bin && sudo ln -s -T chromium.chromedriver chromedriver + or + s = wd.chrome.service.Service(executable_path='chromium.chromedriver') + self._webdriver = wd.Chrome(options=o, service=s) + """ + self._webdriver = wd.Chrome(options=o) + elif webdriver_type == 'edge': # msedgedriver + if not check_executable('msedgedriver', ['--version']): + self._raise_exception('msedgedriver not found in PATH') + o = wd.EdgeOptions() + o.headless = True + self._webdriver = wd.Edge(options=o) + elif webdriver_type == 'safari': # safaridriver + if not check_executable('safaridriver', ['--version']): + self._raise_exception('safaridriver not found in PATH') + """ + safaridriver does not have headless-mode. :( + But macOS includes safaridriver by default. + To enable automation on safaridriver, run the following command once from the admin terminal. + # safaridriver --enable + """ + self._webdriver = wd.Safari() + else: + self._raise_exception('unsupported type: %s' % (webdriver_type)) + self._webdriver.set_page_load_timeout(pageload_timeout) + self._webdriver.set_script_timeout(script_timeout) + + def __del__(self): + if self._webdriver is not None: + self._webdriver.quit() + + def _raise_exception(self, msg, cause=None): + raise ExtractorError('[WebDriverJSWrapper] %s' % (msg), cause=cause) + + def get(self, url): + """Loads a web page in the current browser session""" + self._webdriver.get(url) + + def executeJS(self, jscode): + """Execute JS and return value""" + try: + ret = self._webdriver.execute_script(jscode) + except Exception as e: + self._raise_exception('Failed to execute JS', cause=e) + return ret