Option to determine extractors ahead of time

Sometimes I just want to know whether youtube-dl can be expected to handle a given URL. This option accomplishes that quickly. Previously, I would run youtube-dl with --simulate or --skip-download, but these would take 5+ seconds on my system before returning. The --determine-extractors option of this PR, however, only takes 1.3 to 2.6 seconds. You can use it, for example, to handle arbitrary URLs intelligently: If --determine-extractor indicates success, run youtube-dl (or mpv or whatever). Otherwise, run $BROWSER. Thank you.
2024-11-15 22:05:39 +00:00 · 2021-10-10 22:12:04 -04:00 · 2021-10-10 22:12:04 -04:00 · df1852a74e
commit df1852a74e
parent a803582717
2 changed files with 22 additions and 0 deletions
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -9,6 +9,7 @@ import codecs
 import io
 import os
 import random
 import re
 import sys
@ -123,6 +124,23 @@ def _real_main(argv=None):
        table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
        write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
        sys.exit(0)
    if opts.determine_extractors:
        status = 1
        for url in all_urls:
            if re.match(r'^[^\s/]+\.[^\s/]+/', url):  # generic.py
                write_string('The url doesn\'t specify the protocol, trying with http\n', out=sys.stderr)
                url = 'http://' + url
            for ie in list_extractors(opts.age_limit):
                if not ie._WORKING or ie.IE_NAME == 'generic':
                    continue
                try:
                    if re.match(getattr(ie, '_VALID_URL'), url):
                        write_string(ie.IE_NAME + ' ' + url + '\n', out=sys.stdout)
                        status = 0
                        break
                except AttributeError:
                    pass
        sys.exit(status)
    # Conflicting, missing and erroneous options
    if opts.usenetrc and (opts.username is not None or opts.password is not None):
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -161,6 +161,10 @@ def parseOpts(overrideArguments=None):
        '--extractor-descriptions',
        action='store_true', dest='list_extractor_descriptions', default=False,
        help='Output descriptions of all supported extractors')
    general.add_option(
        '--determine-extractors',
        action='store_true', dest='determine_extractors', default=False,
        help='List the extractor that would be used for each URL. Exit status indicates at least one successful match.')
    general.add_option(
        '--force-generic-extractor',
        action='store_true', dest='force_generic_extractor', default=False,