From 71211e7db7243377f862dfdea9a9c3a511df66c2 Mon Sep 17 00:00:00 2001
From: gy-chen <gychen@gyhost.icu>
Date: Sat, 23 Mar 2024 23:30:13 +0800
Subject: [PATCH 01/48] [Youtube] Fix unwanted private method __ie_msg in
 f8b0135850

Fixes `AttributeError no attribute '_YoutubeIE__ie_msg'` if unable to decode n-parameter
---
 youtube_dl/extractor/youtube.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index db840fc45..90c16e172 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1647,10 +1647,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         except JSInterpreter.Exception as e:
             self.report_warning(
                 '%s (%s %s)' % (
-                    self.__ie_msg(
-                        'Unable to decode n-parameter: download likely to be throttled'),
+                    'Unable to decode n-parameter: download likely to be throttled',
                     error_to_compat_str(e),
-                    traceback.format_exc()))
+                    traceback.format_exc()),
+                video_id=video_id)
             return
 
         self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))

From 182f63e82a390e138f4a133d3ccb9c838222b02d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 1 Mar 2024 15:25:44 +0000
Subject: [PATCH 02/48] [compat] Add compat_contextlib_suppress

with compat_contextlib_suppress(*Exceptions):
    # code that fails silently for any of Exceptions
---
 youtube_dl/compat.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 818ccebd0..637f0d82c 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2943,6 +2943,24 @@ else:
     compat_socket_create_connection = socket.create_connection
 
 
+try:
+    from contextlib import suppress as compat_contextlib_suppress
+except ImportError:
+    class compat_contextlib_suppress(object):
+        _exceptions = None
+
+        def __init__(self, *exceptions):
+            super(compat_contextlib_suppress, self).__init__()
+            # TODO: [Base]ExceptionGroup (3.12+)
+            self._exceptions = exceptions
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc_val, exc_tb):
+            return exc_val is not None and isinstance(exc_val, self._exceptions or tuple())
+
+
 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
 # See http://bugs.python.org/issue9161 for what is broken
 def workaround_optparse_bug9161():
@@ -3263,6 +3281,7 @@ __all__ = [
     'compat_http_cookiejar_Cookie',
     'compat_http_cookies',
     'compat_http_cookies_SimpleCookie',
+    'compat_contextlib_suppress',
     'compat_ctypes_WINFUNCTYPE',
     'compat_etree_fromstring',
     'compat_filter',

From 19dc10b986eeda47975a0e77e84df20ad3b59062 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 1 Mar 2024 19:53:58 +0000
Subject: [PATCH 03/48] [utils] Apply compat_contextlib_suppress

---
 youtube_dl/utils.py | 66 +++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 42 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 03c73dff3..083446342 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -45,6 +45,7 @@ from .compat import (
     compat_casefold,
     compat_chr,
     compat_collections_abc,
+    compat_contextlib_suppress,
     compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
     compat_datetime_timedelta_total_seconds,
@@ -1855,25 +1856,18 @@ def write_json_file(obj, fn):
     try:
         with tf:
             json.dump(obj, tf)
-        if sys.platform == 'win32':
-            # Need to remove existing file on Windows, else os.rename raises
-            # WindowsError or FileExistsError.
-            try:
+        with compat_contextlib_suppress(OSError):
+            if sys.platform == 'win32':
+                # Need to remove existing file on Windows, else os.rename raises
+                # WindowsError or FileExistsError.
                 os.unlink(fn)
-            except OSError:
-                pass
-        try:
             mask = os.umask(0)
             os.umask(mask)
             os.chmod(tf.name, 0o666 & ~mask)
-        except OSError:
-            pass
         os.rename(tf.name, fn)
     except Exception:
-        try:
+        with compat_contextlib_suppress(OSError):
             os.remove(tf.name)
-        except OSError:
-            pass
         raise
 
 
@@ -2033,14 +2027,13 @@ def extract_attributes(html_element):
     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
     """
-    parser = HTMLAttributeParser()
-    try:
-        parser.feed(html_element)
-        parser.close()
-    # Older Python may throw HTMLParseError in case of malformed HTML
-    except compat_HTMLParseError:
-        pass
-    return parser.attrs
+    ret = None
+    # Older Python may throw HTMLParseError in case of malformed HTML (and on .close()!)
+    with compat_contextlib_suppress(compat_HTMLParseError):
+        with contextlib.closing(HTMLAttributeParser()) as parser:
+            parser.feed(html_element)
+            ret = parser.attrs
+    return ret or {}
 
 
 def clean_html(html):
@@ -2241,7 +2234,8 @@ def _htmlentity_transform(entity_with_semicolon):
             numstr = '0%s' % numstr
         else:
             base = 10
-        # See https://github.com/ytdl-org/youtube-dl/issues/7518
+        # See https://github.com/ytdl-org/youtube-dl/issues/7518\
+        # Also, weirdly, compat_contextlib_suppress fails here in 2.6
         try:
             return compat_chr(int(numstr, base))
         except ValueError:
@@ -2348,11 +2342,9 @@ def make_HTTPS_handler(params, **kwargs):
         # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
         # https://github.com/python/cpython/issues/85140
         # https://github.com/yt-dlp/yt-dlp/issues/3878
-        try:
+        with compat_contextlib_suppress(AttributeError, NotImplementedError):
+            # fails for Python < 2.7.10, not ssl.HAS_ALPN
             ctx.set_alpn_protocols(ALPN_PROTOCOLS)
-        except (AttributeError, NotImplementedError):
-            # Python < 2.7.10, not ssl.HAS_ALPN
-            pass
 
     opts_no_check_certificate = params.get('nocheckcertificate', False)
     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
@@ -2362,12 +2354,10 @@ def make_HTTPS_handler(params, **kwargs):
             context.check_hostname = False
             context.verify_mode = ssl.CERT_NONE
 
-        try:
+        with compat_contextlib_suppress(TypeError):
+            # Fails with Python 2.7.8 (create_default_context present
+            # but HTTPSHandler has no context=)
             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-        except TypeError:
-            # Python 2.7.8
-            # (create_default_context present but HTTPSHandler has no context=)
-            pass
 
     if sys.version_info < (3, 2):
         return YoutubeDLHTTPSHandler(params, **kwargs)
@@ -3176,12 +3166,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
     if timezone is None:
         timezone, date_str = extract_timezone(date_str)
 
-    try:
+    with compat_contextlib_suppress(ValueError):
         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
         dt = datetime.datetime.strptime(date_str, date_format) - timezone
         return calendar.timegm(dt.timetuple())
-    except ValueError:
-        pass
 
 
 def date_formats(day_first=True):
@@ -3201,17 +3189,13 @@ def unified_strdate(date_str, day_first=True):
     _, date_str = extract_timezone(date_str)
 
     for expression in date_formats(day_first):
-        try:
+        with compat_contextlib_suppress(ValueError):
             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
-        except ValueError:
-            pass
     if upload_date is None:
         timetuple = email.utils.parsedate_tz(date_str)
         if timetuple:
-            try:
+            with compat_contextlib_suppress(ValueError):
                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
-            except ValueError:
-                pass
     if upload_date is not None:
         return compat_str(upload_date)
 
@@ -3240,11 +3224,9 @@ def unified_timestamp(date_str, day_first=True):
         date_str = m.group(1)
 
     for expression in date_formats(day_first):
-        try:
+        with compat_contextlib_suppress(ValueError):
             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
             return calendar.timegm(dt.timetuple())
-        except ValueError:
-            pass
     timetuple = email.utils.parsedate_tz(date_str)
     if timetuple:
         return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone)

From 31a15a7c8dfc618e5090948e6b44d27911383d4b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 10 Mar 2024 19:06:57 +0000
Subject: [PATCH 04/48] [compat] Simplify/fix compat_html_parser_HTMLParseError

---
 youtube_dl/compat.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 637f0d82c..75dff58f2 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2421,23 +2421,20 @@ except ImportError:  # Python 2
 compat_urllib_request_urlretrieve = compat_urlretrieve
 
 try:
+    from HTMLParser import (
+        HTMLParser as compat_HTMLParser,
+        HTMLParseError as compat_HTMLParseError)
+except ImportError:  # Python 3
     from html.parser import HTMLParser as compat_HTMLParser
-except ImportError:  # Python 2
-    from HTMLParser import HTMLParser as compat_HTMLParser
-compat_html_parser_HTMLParser = compat_HTMLParser
-
-try:  # Python 2
-    from HTMLParser import HTMLParseError as compat_HTMLParseError
-except ImportError:  # Python <3.4
     try:
         from html.parser import HTMLParseError as compat_HTMLParseError
     except ImportError:  # Python >3.4
-
-        # HTMLParseError has been deprecated in Python 3.3 and removed in
+        # HTMLParseError was deprecated in Python 3.3 and removed in
         # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
         # and uniform cross-version exception handling
         class compat_HTMLParseError(Exception):
             pass
+compat_html_parser_HTMLParser = compat_HTMLParser
 compat_html_parser_HTMLParseError = compat_HTMLParseError
 
 try:

From d8f134a664d7be2c10aba44fc2d54a8f7b0542ff Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 2 Mar 2024 15:17:09 +0000
Subject: [PATCH 05/48] [downloader/external] Fix "Resource Warning" in
 downloader test * add compat_subprocess_Popen context manager * apply context
 manager in FFmpegFD._call_downloader()

---
 youtube_dl/compat.py              | 34 +++++++++++++++++++++++++++---
 youtube_dl/downloader/external.py | 35 ++++++++++++++++++-------------
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 75dff58f2..53ff2a892 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2438,9 +2438,9 @@ compat_html_parser_HTMLParser = compat_HTMLParser
 compat_html_parser_HTMLParseError = compat_HTMLParseError
 
 try:
-    from subprocess import DEVNULL
-    compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
+    _DEVNULL = subprocess.DEVNULL
+    compat_subprocess_get_DEVNULL = lambda: _DEVNULL
+except AttributeError:
     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
 
 try:
@@ -2958,6 +2958,33 @@ except ImportError:
             return exc_val is not None and isinstance(exc_val, self._exceptions or tuple())
 
 
+# subprocess.Popen context manager
+# avoids leaking handles if .communicate() is not called
+try:
+    _Popen = subprocess.Popen
+    # check for required context manager attributes
+    _Popen.__enter__ and _Popen.__exit__
+    compat_subprocess_Popen = _Popen
+except AttributeError:
+    # not a context manager - make one
+    from contextlib import contextmanager
+
+    @contextmanager
+    def compat_subprocess_Popen(*args, **kwargs):
+        popen = None
+        try:
+            popen = _Popen(*args, **kwargs)
+            yield popen
+        finally:
+            if popen:
+                for f in (popen.stdin, popen.stdout, popen.stderr):
+                    if f:
+                        # repeated .close() is OK, but just in case
+                        with compat_contextlib_suppress(EnvironmentError):
+                            f.close()
+                popen.wait()
+
+
 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
 # See http://bugs.python.org/issue9161 for what is broken
 def workaround_optparse_bug9161():
@@ -3314,6 +3341,7 @@ __all__ = [
     'compat_struct_pack',
     'compat_struct_unpack',
     'compat_subprocess_get_DEVNULL',
+    'compat_subprocess_Popen',
     'compat_tokenize_tokenize',
     'compat_urllib_error',
     'compat_urllib_parse',
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index bc228960e..f22fa6013 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -11,6 +11,7 @@ from .common import FileDownloader
 from ..compat import (
     compat_setenv,
     compat_str,
+    compat_subprocess_Popen,
 )
 from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
 from ..utils import (
@@ -483,21 +484,25 @@ class FFmpegFD(ExternalFD):
 
         self._debug_cmd(args)
 
-        proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
-        try:
-            retval = proc.wait()
-        except BaseException as e:
-            # subprocess.run would send the SIGKILL signal to ffmpeg and the
-            # mp4 file couldn't be played, but if we ask ffmpeg to quit it
-            # produces a file that is playable (this is mostly useful for live
-            # streams). Note that Windows is not affected and produces playable
-            # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
-            if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
-                process_communicate_or_kill(proc, b'q')
-            else:
-                proc.kill()
-                proc.wait()
-            raise
+        # From [1], a PIPE opened in Popen() should be closed, unless
+        # .communicate() is called. Avoid leaking any PIPEs by using Popen
+        # as a context manager (newer Python 3.x and compat)
+        # Fixes "Resource Warning" in test/test_downloader_external.py
+        # [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
+        with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
+            try:
+                retval = proc.wait()
+            except BaseException as e:
+                # subprocess.run would send the SIGKILL signal to ffmpeg and the
+                # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+                # produces a file that is playable (this is mostly useful for live
+                # streams). Note that Windows is not affected and produces playable
+                # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+                if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
+                    process_communicate_or_kill(proc, b'q')
+                else:
+                    proc.kill()
+                raise
         return retval
 
 

From 21792b88b791b16e3ab0a0fb2e26e5bb8a4e2ff3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Mar 2024 12:38:00 +0000
Subject: [PATCH 06/48] [external/FFmpeg] Fix and improve --ffmpeg-location
 handling * pass YoutubeDL (FileDownloader) to FFmpegPostProcessor constructor
 * consolidate path search in FFmpegPostProcessor * make availability of
 FFmpegFD depend on existence of FFmpegPostProcessor * detect ffmpeg
 executable on instantiation of FFmpegFD * resolves #32735

---
 test/test_downloader_external.py   | 16 ++++++++++++-
 youtube_dl/downloader/external.py  | 17 +++++++++-----
 youtube_dl/postprocessor/ffmpeg.py | 36 +++++++++---------------------
 3 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
index 029f9b05f..4491bd9de 100644
--- a/test/test_downloader_external.py
+++ b/test/test_downloader_external.py
@@ -18,6 +18,7 @@ from test.helper import (
 )
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import (
+    compat_contextlib_suppress,
     compat_http_cookiejar_Cookie,
     compat_http_server,
     compat_kwargs,
@@ -35,6 +36,9 @@ from youtube_dl.downloader.external import (
     HttpieFD,
     WgetFD,
 )
+from youtube_dl.postprocessor import (
+    FFmpegPostProcessor,
+)
 import threading
 
 TEST_SIZE = 10 * 1024
@@ -227,7 +231,17 @@ class TestAria2cFD(unittest.TestCase):
             self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
 
 
-@ifExternalFDAvailable(FFmpegFD)
+# Handle delegated availability
+def ifFFmpegFDAvailable(externalFD):
+    # raise SkipTest, or set False!
+    avail = ifExternalFDAvailable(externalFD) and False
+    with compat_contextlib_suppress(Exception):
+        avail = FFmpegPostProcessor(downloader=None).available
+    return unittest.skipUnless(
+        avail, externalFD.get_basename() + ' not found')
+
+
+@ifFFmpegFDAvailable(FFmpegFD)
 class TestFFmpegFD(unittest.TestCase):
     _args = []
 
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index f22fa6013..4fbc0f520 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -13,7 +13,12 @@ from ..compat import (
     compat_str,
     compat_subprocess_Popen,
 )
-from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+
+try:
+    from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+except ImportError:
+    FFmpegPostProcessor = None
+
 from ..utils import (
     cli_option,
     cli_valueless_option,
@@ -362,13 +367,14 @@ class FFmpegFD(ExternalFD):
 
     @classmethod
     def available(cls):
-        return FFmpegPostProcessor().available
+        # actual availability can only be confirmed for an instance
+        return bool(FFmpegPostProcessor)
 
     def _call_downloader(self, tmpfilename, info_dict):
-        url = info_dict['url']
-        ffpp = FFmpegPostProcessor(downloader=self)
+        # `downloader` means the parent `YoutubeDL`
+        ffpp = FFmpegPostProcessor(downloader=self.ydl)
         if not ffpp.available:
-            self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+            self.report_error('ffmpeg required for download but no ffmpeg (nor avconv) executable could be found. Please install one.')
             return False
         ffpp.check_version()
 
@@ -397,6 +403,7 @@ class FFmpegFD(ExternalFD):
         # if end_time:
         #     args += ['-t', compat_str(end_time - start_time)]
 
+        url = info_dict['url']
         cookies = self.ydl.cookiejar.get_cookies_for_url(url)
         if cookies:
             args.extend(['-cookies', ''.join(
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 801160e6c..e5ffdf378 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -96,6 +96,7 @@ class FFmpegPostProcessor(PostProcessor):
 
         self._paths = None
         self._versions = None
+        location = None
         if self._downloader:
             prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
             location = self._downloader.params.get('ffmpeg_location')
@@ -118,32 +119,17 @@ class FFmpegPostProcessor(PostProcessor):
                     location = os.path.dirname(os.path.abspath(location))
                     if basename in ('ffmpeg', 'ffprobe'):
                         prefer_ffmpeg = True
+        self._paths = dict(
+            (p, p if location is None else os.path.join(location, p))
+            for p in programs)
+        self._versions = dict(
+            x for x in (
+                (p, get_ffmpeg_version(self._paths[p])) for p in programs)
+            if x[1] is not None)
 
-                self._paths = dict(
-                    (p, os.path.join(location, p)) for p in programs)
-                self._versions = dict(
-                    (p, get_ffmpeg_version(self._paths[p])) for p in programs)
-        if self._versions is None:
-            self._versions = dict(
-                (p, get_ffmpeg_version(p)) for p in programs)
-            self._paths = dict((p, p) for p in programs)
-
-        if prefer_ffmpeg is False:
-            prefs = ('avconv', 'ffmpeg')
-        else:
-            prefs = ('ffmpeg', 'avconv')
-        for p in prefs:
-            if self._versions[p]:
-                self.basename = p
-                break
-
-        if prefer_ffmpeg is False:
-            prefs = ('avprobe', 'ffprobe')
-        else:
-            prefs = ('ffprobe', 'avprobe')
-        for p in prefs:
-            if self._versions[p]:
-                self.probe_basename = p
+        for p in ('ffmpeg', 'avconv')[::-1 if prefer_ffmpeg is False else 1]:
+            if self._versions.get(p):
+                self.basename = self.probe_basename = p
                 break
 
     @property

From 4ea59c6107d63bbabe1de74569ceb984b1646426 Mon Sep 17 00:00:00 2001
From: Ori Avtalion <ori@avtalion.name>
Date: Fri, 5 Apr 2024 17:25:29 +0300
Subject: [PATCH 07/48] [utils] Fix crash in _report_ignoring_subs from c58b655
  (#32762)

Align `utils.bug_reports_message()` with yt-dlp https://github.com/yt-dlp/yt-dlp/commit/5873d4ccdd, thanks fstirlitz

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/utils.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 083446342..e1b05b307 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2371,15 +2371,24 @@ def make_HTTPS_handler(params, **kwargs):
         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 
 
-def bug_reports_message():
+def bug_reports_message(before=';'):
     if ytdl_is_updateable():
         update_cmd = 'type  youtube-dl -U  to update'
     else:
-        update_cmd = 'see  https://yt-dl.org/update  on how to update'
-    msg = '; please report this issue on https://yt-dl.org/bug .'
-    msg += ' Make sure you are using the latest version; %s.' % update_cmd
-    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
-    return msg
+        update_cmd = 'see  https://github.com/ytdl-org/youtube-dl/#user-content-installation  on how to update'
+
+    msg = (
+        'please report this issue on https://github.com/ytdl-org/youtube-dl/issues ,'
+        ' using the appropriate issue template.'
+        ' Make sure you are using the latest version; %s.'
+        ' Be sure to call youtube-dl with the --verbose option and include the complete output.'
+    ) % update_cmd
+
+    before = (before or '').rstrip()
+    if not before or before.endswith(('.', '!', '?')):
+        msg = msg[0].title() + msg[1:]
+
+    return (before + ' ' if before else '') + msg
 
 
 class YoutubeDLError(Exception):

From e0727e4ab61b6e45f7792546b8b5ff52a0ea22b5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 7 Apr 2024 15:26:12 +0100
Subject: [PATCH 08/48] [postprocessor/ffmpeg] Fix finding ffprobe (bug in
 21792b8) Fixes
 https://github.com/ytdl-org/youtube-dl/commit/21792b88b791b16e3ab0a0fb2e26e5bb8a4e2ff3#commitcomment-140705274,
 thx: vonProteus

---
 youtube_dl/postprocessor/ffmpeg.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index e5ffdf378..214825aa9 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -74,8 +74,11 @@ class FFmpegPostProcessor(PostProcessor):
         return FFmpegPostProcessor(downloader)._versions
 
     def _determine_executables(self):
-        programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+        # ordered to match prefer_ffmpeg!
+        convs = ['ffmpeg', 'avconv']
+        probes = ['ffprobe', 'avprobe']
         prefer_ffmpeg = True
+        programs = convs + probes
 
         def get_ffmpeg_version(path):
             ver = get_exe_version(path, args=['-version'])
@@ -127,10 +130,13 @@ class FFmpegPostProcessor(PostProcessor):
                 (p, get_ffmpeg_version(self._paths[p])) for p in programs)
             if x[1] is not None)
 
-        for p in ('ffmpeg', 'avconv')[::-1 if prefer_ffmpeg is False else 1]:
-            if self._versions.get(p):
-                self.basename = self.probe_basename = p
-                break
+        basenames = [None, None]
+        for i, progs in enumerate((convs, probes)):
+            for p in progs[::-1 if prefer_ffmpeg is False else 1]:
+                if self._versions.get(p):
+                    basenames[i] = p
+                    break
+        self.basename, self.probe_basename = basenames
 
     @property
     def available(self):

From eb3866543810c7be96f40ec94e0c00df19b93861 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 1 Mar 2024 14:48:30 +0000
Subject: [PATCH 09/48] [YouPorn] Incorporate yt-dlp PR 8827 * from
 https://github.com/yt-dlp/yt-dlp/pull/8827 * extract from webpage instead of
 broken API URL * thx The-MAGI

---
 youtube_dl/extractor/youporn.py | 118 ++++++++++++++++++++++----------
 1 file changed, 80 insertions(+), 38 deletions(-)

diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index 31e8abb72..4c455b5b8 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -8,6 +8,9 @@ from ..utils import (
     extract_attributes,
     int_or_none,
     str_to_int,
+    merge_dicts,
+    T,
+    traverse_obj,
     unified_strdate,
     url_or_none,
 )
@@ -15,6 +18,7 @@ from ..utils import (
 
 class YouPornIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
     _TESTS = [{
         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@@ -66,57 +70,91 @@ class YouPornIE(InfoExtractor):
     }, {
         'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
         'only_matching': True,
+    }, {
+        'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
+        'info_dict': {
+            'id': '46949121',
+            'age_limit': 18,
+            'categories': [],
+            'description': None,  # SEO spam using title removed
+            'display_id': 'tinderspecial-trailer1',
+            'duration': 298.0,
+            'ext': 'mp4',
+            'upload_date': '20201123',
+            'uploader': 'Ersties',
+            'tags': [],
+            'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
+            'timestamp': 1606147564,
+            'title': 'Tinder In Real Life',
+            'view_count': int,
+        }
     }]
 
-    @staticmethod
-    def _extract_urls(webpage):
-        return re.findall(
-            r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
-            webpage)
+    @classmethod
+    def _extract_urls(cls, webpage):
+        def yield_urls():
+            for p in cls._EMBED_REGEX:
+                for m in re.finditer(p, webpage):
+                    yield m.group('url')
+
+        return list(yield_urls())
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id') or video_id
+        display_id = self._match_valid_url(url).group('id', 'display_id')
+        url = 'http://www.youporn.com/watch/%s' % (display_id[0],)
+        display_id = display_id[1] or display_id[0]
+        webpage = self._download_webpage(
+            url, display_id, headers={'Cookie': 'age_verified=1'})
+        video_id = display_id[0]
 
-        definitions = self._download_json(
-            'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
-            display_id)
+        playervars = self._search_json(
+            r'\bplayervars\s*:', webpage, 'playervars', display_id)
+
+        def get_fmt(x):
+            v_url = url_or_none(x.get('videoUrl'))
+            if v_url:
+                x['videoUrl'] = v_url
+                return (x['format'], x)
+
+        defs_by_format = dict(traverse_obj(playervars, (
+            'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
+
+        def get_format_data(f):
+            if f not in defs_by_format:
+                return []
+            return self._download_json(
+                defs_by_format[f]['videoUrl'], display_id, '{0}-formats'.format(f))
 
         formats = []
-        for definition in definitions:
-            if not isinstance(definition, dict):
-                continue
-            video_url = url_or_none(definition.get('videoUrl'))
-            if not video_url:
-                continue
-            f = {
-                'url': video_url,
-                'filesize': int_or_none(definition.get('videoSize')),
-            }
-            height = int_or_none(definition.get('quality'))
+        # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
+        for hls_url in traverse_obj(
+                get_format_data('hls'),
+                (lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
+                (Ellipsis, 'videoUrl')):
+            formats.extend(self._extract_m3u8_formats(
+                hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
+                entry_protocol='m3u8_native'))
+
+        for f in traverse_obj(get_format_data('mp4'), (
+                lambda _, v: v.get('videoUrl'), {
+                    'url': ('videoUrl', T(url_or_none)),
+                    'filesize': ('videoSize', T(int_or_none)),
+                    'height': ('quality', T(int_or_none)),
+                }, T(lambda x: x.get('videoUrl') and x))):
             # Video URL's path looks like this:
             #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
             # We will benefit from it by extracting some metadata
-            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
             if mobj:
-                if not height:
-                    height = int(mobj.group('height'))
-                bitrate = int(mobj.group('bitrate'))
-                f.update({
-                    'format_id': '%dp-%dk' % (height, bitrate),
-                    'tbr': bitrate,
-                })
-            f['height'] = height
+                if not f.get('height'):
+                    f['height'] = int(mobj.group('height'))
+                f['tbr'] = int(mobj.group('bitrate'))
+                f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
             formats.append(f)
         self._sort_formats(formats)
 
-        webpage = self._download_webpage(
-            'http://www.youporn.com/watch/%s' % video_id, display_id,
-            headers={'Cookie': 'age_verified=1'})
-
         title = self._html_search_regex(
             r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
             webpage, 'title', default=None) or self._og_search_title(
@@ -168,9 +206,12 @@ class YouPornIE(InfoExtractor):
             r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
             'tags')
 
-        return {
+        data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
+        data.pop('url', None)
+
+        result = merge_dicts(data, {
             'id': video_id,
-            'display_id': display_id,
+            'display_id': display_id if display_id != video_id else None,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
@@ -183,4 +224,5 @@ class YouPornIE(InfoExtractor):
             'tags': tags,
             'age_limit': age_limit,
             'formats': formats,
-        }
+        })
+        return result

From c2766cb80ee9add4d2d2d94b1878e7884c63ceaf Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 Apr 2024 12:47:05 +0100
Subject: [PATCH 10/48] [test/test_download] Support 'playlist_maxcount:count'
 expected value * parallel to `playlist_mincount' * specify both for a range
 of playlist lengths * if max < min the test will always fail!

---
 test/test_download.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/test/test_download.py b/test/test_download.py
index e0bc8cb95..df8b370cf 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import (
     assertGreaterEqual,
+    assertLessEqual,
     expect_warnings,
     get_params,
     gettestcases,
@@ -122,7 +123,10 @@ def generator(test_case, tname):
         params['outtmpl'] = tname + '_' + params['outtmpl']
         if is_playlist and 'playlist' not in test_case:
             params.setdefault('extract_flat', 'in_playlist')
-            params.setdefault('playlistend', test_case.get('playlist_mincount'))
+            params.setdefault('playlistend',
+                              test_case['playlist_maxcount'] + 1
+                              if test_case.get('playlist_maxcount')
+                              else test_case.get('playlist_mincount'))
             params.setdefault('skip_download', True)
 
         ydl = YoutubeDL(params, auto_init=False)
@@ -190,6 +194,14 @@ def generator(test_case, tname):
                     'Expected at least %d in playlist %s, but got only %d' % (
                         test_case['playlist_mincount'], test_case['url'],
                         len(res_dict['entries'])))
+            if 'playlist_maxcount' in test_case:
+                assertLessEqual(
+                    self,
+                    len(res_dict['entries']),
+                    test_case['playlist_maxcount'],
+                    'Expected at most %d in playlist %s, but got %d' % (
+                        test_case['playlist_maxcount'], test_case['url'],
+                        len(res_dict['entries'])))
             if 'playlist_count' in test_case:
                 self.assertEqual(
                     len(res_dict['entries']),

From 0b2ce3685e02ea1a3ccee1026572e081b8f6ac83 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 1 Mar 2024 15:07:30 +0000
Subject: [PATCH 11/48] [YouPorn] Improve extraction * detect unwatchable
 videos * improve duration extraction * fix count extraction and support large
 values * detect and remove SEO spam boilerplate description

---
 youtube_dl/extractor/youporn.py | 64 +++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index 4c455b5b8..8f2175dde 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -5,10 +5,13 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     extract_attributes,
+    ExtractorError,
+    get_element_by_id,
     int_or_none,
-    str_to_int,
     merge_dicts,
+    parse_count,
     T,
     traverse_obj,
     unified_strdate,
@@ -17,7 +20,13 @@ from ..utils import (
 
 
 class YouPornIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+    _VALID_URL = (
+        r'youporn:(?P<id>\d+)',
+        r'''(?x)
+            https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
+            (?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
+    '''
+    )
     _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
     _TESTS = [{
         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
@@ -38,7 +47,7 @@ class YouPornIE(InfoExtractor):
             'tags': list,
             'age_limit': 18,
         },
-        'skip': 'This video has been disabled',
+        'skip': 'This video has been deactivated',
     }, {
         # Unknown uploader
         'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@@ -73,7 +82,7 @@ class YouPornIE(InfoExtractor):
     }, {
         'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
         'info_dict': {
-            'id': '46949121',
+            'id': '16290308',
             'age_limit': 18,
             'categories': [],
             'description': None,  # SEO spam using title removed
@@ -100,15 +109,28 @@ class YouPornIE(InfoExtractor):
         return list(yield_urls())
 
     def _real_extract(self, url):
-        display_id = self._match_valid_url(url).group('id', 'display_id')
-        url = 'http://www.youporn.com/watch/%s' % (display_id[0],)
-        display_id = display_id[1] or display_id[0]
+        # A different video ID (data-video-id) is hidden in the page but
+        # never seems to be used
+        video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+        url = 'http://www.youporn.com/watch/%s' % (video_id,)
         webpage = self._download_webpage(
-            url, display_id, headers={'Cookie': 'age_verified=1'})
-        video_id = display_id[0]
+            url, video_id, headers={'Cookie': 'age_verified=1'})
+
+        watchable = self._search_regex(
+            r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
+            webpage, 'watchability', default=None)
+        if not watchable:
+            msg = re.split(r'\s{4}', clean_html(get_element_by_id(
+                'mainContent', webpage)) or '')[0]
+            raise ExtractorError(
+                ('%s says: %s' % (self.IE_NAME, msg))
+                if msg else 'Video unavailable: no reason found',
+                expected=True)
+        # internal ID ?
+        # video_id = extract_attributes(watchable).get('data-video-id')
 
         playervars = self._search_json(
-            r'\bplayervars\s*:', webpage, 'playervars', display_id)
+            r'\bplayervars\s*:', webpage, 'playervars', video_id)
 
         def get_fmt(x):
             v_url = url_or_none(x.get('videoUrl'))
@@ -123,7 +145,7 @@ class YouPornIE(InfoExtractor):
             if f not in defs_by_format:
                 return []
             return self._download_json(
-                defs_by_format[f]['videoUrl'], display_id, '{0}-formats'.format(f))
+                defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
 
         formats = []
         # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@@ -169,8 +191,10 @@ class YouPornIE(InfoExtractor):
         thumbnail = self._search_regex(
             r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
             webpage, 'thumbnail', fatal=False, group='thumbnail')
-        duration = int_or_none(self._html_search_meta(
-            'video:duration', webpage, 'duration', fatal=False))
+        duration = traverse_obj(playervars, ('duration', T(int_or_none)))
+        if duration is None:
+            duration = int_or_none(self._html_search_meta(
+                'video:duration', webpage, 'duration', fatal=False))
 
         uploader = self._html_search_regex(
             r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
@@ -186,11 +210,11 @@ class YouPornIE(InfoExtractor):
 
         view_count = None
         views = self._search_regex(
-            r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
-            'views', default=None)
+            r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
+            webpage, 'views', default=None)
         if views:
-            view_count = str_to_int(extract_attributes(views).get('data-value'))
-        comment_count = str_to_int(self._search_regex(
+            view_count = parse_count(extract_attributes(views).get('data-value'))
+        comment_count = parse_count(self._search_regex(
             r'>All [Cc]omments? \(([\d,.]+)\)',
             webpage, 'comment count', default=None))
 
@@ -211,7 +235,7 @@ class YouPornIE(InfoExtractor):
 
         result = merge_dicts(data, {
             'id': video_id,
-            'display_id': display_id if display_id != video_id else None,
+            'display_id': display_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
@@ -225,4 +249,8 @@ class YouPornIE(InfoExtractor):
             'age_limit': age_limit,
             'formats': formats,
         })
+        # Remove promotional non-description
+        if result.get('description', '').startswith(
+                'Watch %s online' % (result['title'],)):
+            del result['description']
         return result

From 668332b9733023ca2e927eeb2208725022248af8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 Apr 2024 13:06:25 +0100
Subject: [PATCH 12/48] [YouPorn] Add playlist extractors * YouPornCategoryIE *
 YouPornChannelIE * YouPornCollectionIE * YouPornStarIE * YouPornTagIE *
 YouPornVideosIE,

---
 youtube_dl/extractor/extractors.py |  10 +-
 youtube_dl/extractor/youporn.py    | 438 +++++++++++++++++++++++++++++
 2 files changed, 447 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index a56a7c52f..03d035a27 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1653,7 +1653,15 @@ from .younow import (
     YouNowChannelIE,
     YouNowMomentIE,
 )
-from .youporn import YouPornIE
+from .youporn import (
+    YouPornIE,
+    YouPornCategoryIE,
+    YouPornChannelIE,
+    YouPornCollectionIE,
+    YouPornStarIE,
+    YouPornTagIE,
+    YouPornVideosIE,
+)
 from .yourporn import YourPornIE
 from .yourupload import YourUploadIE
 from .youtube import (
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index 8f2175dde..ec6125a79 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -1,21 +1,26 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import itertools
 import re
+from time import sleep
 
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
     extract_attributes,
     ExtractorError,
+    get_element_by_class,
     get_element_by_id,
     int_or_none,
     merge_dicts,
     parse_count,
+    parse_qs,
     T,
     traverse_obj,
     unified_strdate,
     url_or_none,
+    urljoin,
 )
 
 
@@ -254,3 +259,436 @@ class YouPornIE(InfoExtractor):
                 'Watch %s online' % (result['title'],)):
             del result['description']
         return result
+
+
+class YouPornListBase(InfoExtractor):
+    # pattern in '.title-text' element of page section containing videos
+    _PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
+    _PAGE_RETRY_COUNT = 0  # ie, no retry
+    _PAGE_RETRY_DELAY = 2  # seconds
+
+    def _get_next_url(self, url, pl_id, html):
+        return urljoin(url, self._search_regex(
+            r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
+            get_element_by_id('next', html) or '', 'next page',
+            group='url', default=None))
+
+    @classmethod
+    def _get_title_from_slug(cls, title_slug):
+        return re.sub(r'[_-]', ' ', title_slug)
+
+    def _entries(self, url, pl_id, html=None, page_num=None):
+
+        # separates page sections
+        PLAYLIST_SECTION_RE = (
+            r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
+        )
+        # contains video link
+        VIDEO_URL_RE = r'''(?x)
+            <div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
+            (?:<div\b[\s\S]+?</div>\s*)*
+            <a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
+        '''
+
+        def yield_pages(url, html=html, page_num=page_num):
+            fatal = not html
+            for pnum in itertools.count(start=page_num or 1):
+                if not html:
+                    html = self._download_webpage(
+                        url, pl_id, note='Downloading page %d' % pnum,
+                        fatal=fatal)
+                if not html:
+                    break
+                fatal = False
+                yield (url, html, pnum)
+                # explicit page: extract just that page
+                if page_num is not None:
+                    break
+                next_url = self._get_next_url(url, pl_id, html)
+                if not next_url or next_url == url:
+                    break
+                url, html = next_url, None
+
+        def retry_page(msg, tries_left, page_data):
+            if tries_left <= 0:
+                return
+            self.report_warning(msg, pl_id)
+            sleep(self._PAGE_RETRY_DELAY)
+            return next(
+                yield_pages(page_data[0], page_num=page_data[2]), None)
+
+        def yield_entries(html):
+            for frag in re.split(PLAYLIST_SECTION_RE, html):
+                if not frag:
+                    continue
+                t_text = get_element_by_class('title-text', frag or '')
+                if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
+                    continue
+                for m in re.finditer(VIDEO_URL_RE, frag):
+                    video_url = urljoin(url, m.group('url'))
+                    if video_url:
+                        yield self.url_result(video_url)
+
+        last_first_url = None
+        for page_data in yield_pages(url, html=html, page_num=page_num):
+            # page_data: url, html, page_num
+            first_url = None
+            tries_left = self._PAGE_RETRY_COUNT + 1
+            while tries_left > 0:
+                tries_left -= 1
+                for from_ in yield_entries(page_data[1]):
+                    # may get the same page twice instead of empty page
+                    # or (site bug) intead of actual next page
+                    if not first_url:
+                        first_url = from_['url']
+                        if first_url == last_first_url:
+                            # sometimes (/porntags/) the site serves the previous page
+                            # instead but may provide the correct page after a delay
+                            page_data = retry_page(
+                                'Retrying duplicate page...', tries_left, page_data)
+                            if page_data:
+                                first_url = None
+                                break
+                            continue
+                    yield from_
+                else:
+                    if not first_url and 'no-result-paragarph1' in page_data[1]:
+                        page_data = retry_page(
+                            'Retrying empty page...', tries_left, page_data)
+                        if page_data:
+                            continue
+                    else:
+                        # success/failure
+                        break
+            # may get an infinite (?) sequence of empty pages
+            if not first_url:
+                break
+            last_first_url = first_url
+
+    def _real_extract(self, url, html=None):
+        # exceptionally, id may be None
+        m_dict = self._match_valid_url(url).groupdict()
+        pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
+
+        qs = parse_qs(url)
+        for q, v in qs.items():
+            if v:
+                qs[q] = v[-1]
+            else:
+                del qs[q]
+
+        base_id = pl_id or 'YouPorn'
+        title = self._get_title_from_slug(base_id)
+        if page_type:
+            title = '%s %s' % (page_type.capitalize(), title)
+        base_id = [base_id.lower()]
+        if sort is None:
+            title += ' videos'
+        else:
+            title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
+            base_id.append(sort)
+        if qs:
+            ps = ['%s=%s' % item for item in sorted(qs.items())]
+            title += ' (%s)' % ','.join(ps)
+            base_id.extend(ps)
+        pl_id = '/'.join(base_id)
+
+        return self.playlist_result(
+            self._entries(url, pl_id, html=html,
+                          page_num=int_or_none(qs.get('page'))),
+            playlist_id=pl_id, playlist_title=title)
+
+
+class YouPornCategoryIE(YouPornListBase):
+    IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?youporn\.com/
+        (?P<type>category)/(?P<id>[^/?#&]+)
+        (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
+    '''
+    _TESTS = [{
+        'note': 'Full list with pagination',
+        'url': 'https://www.youporn.com/category/lingerie/popular/',
+        'info_dict': {
+            'id': 'lingerie/popular',
+            'title': 'Category lingerie videos by popular',
+        },
+        'playlist_mincount': 39,
+    }, {
+        'note': 'Filtered paginated list with single page result',
+        'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
+        'info_dict': {
+            'id': 'lingerie/duration/min_minutes=10',
+            'title': 'Category lingerie videos by duration (min_minutes=10)',
+        },
+        'playlist_maxcount': 30,
+    }, {
+        'note': 'Single page of full list',
+        'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
+        'info_dict': {
+            'id': 'lingerie/popular/page=1',
+            'title': 'Category lingerie videos by popular (page=1)',
+        },
+        'playlist_count': 30,
+    }]
+
+
+class YouPornChannelIE(YouPornListBase):
+    IE_DESC = 'YouPorn channel, with sorting and pagination'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?youporn\.com/
+        (?P<type>channel)/(?P<id>[^/?#&]+)
+        (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
+    '''
+    _TESTS = [{
+        'note': 'Full list with pagination',
+        'url': 'https://www.youporn.com/channel/x-feeds/',
+        'info_dict': {
+            'id': 'x-feeds',
+            'title': 'Channel X-Feeds videos',
+        },
+        'playlist_mincount': 37,
+    }, {
+        'note': 'Single page of full list (no filters here)',
+        'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
+        'info_dict': {
+            'id': 'x-feeds/duration/page=1',
+            'title': 'Channel X-Feeds videos by duration (page=1)',
+        },
+        'playlist_count': 24,
+    }]
+
+    @staticmethod
+    def _get_title_from_slug(title_slug):
+        return re.sub(r'_', ' ', title_slug).title()
+
+
+class YouPornCollectionIE(YouPornListBase):
+    IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?youporn\.com/
+        (?P<type>collection)s/videos/(?P<id>\d+)
+        (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
+    '''
+    _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
+    _TESTS = [{
+        'note': 'Full list with pagination',
+        'url': 'https://www.youporn.com/collections/videos/33044251/',
+        'info_dict': {
+            'id': '33044251',
+            'title': 'Collection Sexy Lips videos',
+            'uploader': 'ph-littlewillyb',
+        },
+        'playlist_mincount': 50,
+    }, {
+        'note': 'Single page of full list (no filters here)',
+        'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
+        'info_dict': {
+            'id': '33044251/time/page=1',
+            'title': 'Collection Sexy Lips videos by time (page=1)',
+            'uploader': 'ph-littlewillyb',
+        },
+        'playlist_count': 20,
+    }]
+
+    def _real_extract(self, url):
+        pl_id = self._match_id(url)
+        html = self._download_webpage(url, pl_id)
+        playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
+        infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
+            'collection-infos', html)) or '')
+        title, uploader = self._search_regex(
+            r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
+            infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
+
+        return merge_dicts({
+            'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
+            'uploader': uploader,
+        }, playlist) if title else playlist
+
+
+class YouPornTagIE(YouPornListBase):
+    IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?youporn\.com/
+        porn(?P<type>tag)s/(?P<id>[^/?#&]+)
+        (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
+    '''
+    _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
+    _PAGE_RETRY_COUNT = 1
+    _TESTS = [{
+        'note': 'Full list with pagination',
+        'url': 'https://www.youporn.com/porntags/austrian',
+        'info_dict': {
+            'id': 'austrian',
+            'title': 'Tag austrian videos',
+        },
+        'playlist_mincount': 35,
+        'expected_warnings': ['Retrying duplicate page'],
+    }, {
+        'note': 'Filtered paginated list with single page result',
+        'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
+        'info_dict': {
+            'id': 'austrian/duration/min_minutes=10',
+            'title': 'Tag austrian videos by duration (min_minutes=10)',
+        },
+        # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
+        # or more, varying with number of ads; let's set max as 9x4
+        # NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
+        'playlist_maxcount': 32,
+        'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
+    }, {
+        'note': 'Single page of full list',
+        'url': 'https://www.youporn.com/porntags/austrian/?page=1',
+        'info_dict': {
+            'id': 'austrian/page=1',
+            'title': 'Tag austrian videos (page=1)',
+        },
+        'playlist_mincount': 32,
+        'playlist_maxcount': 34,
+        'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
+    }]
+
+    # YP tag navigation is broken, loses sort
+    def _get_next_url(self, url, pl_id, html):
+        next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
+        if next_url:
+            n = self._match_valid_url(next_url)
+            if n:
+                s = n.groupdict().get('sort')
+            if s:
+                u = self._match_valid_url(url)
+                if u:
+                    u = u.groupdict().get('sort')
+                    if s and not u:
+                        n = n.end('sort')
+                        next_url = next_url[:n] + '/' + u + next_url[n:]
+        return next_url
+
+
+class YouPornStarIE(YouPornListBase):
+    IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?youporn\.com/
+        (?P<type>pornstar)/(?P<id>[^/?#&]+)
+        (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
+    '''
+    _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
+    _TESTS = [{
+        'note': 'Full list with pagination',
+        'url': 'https://www.youporn.com/pornstar/daynia/',
+        'info_dict': {
+            'id': 'daynia',
+            'title': 'Pornstar Daynia videos',
+            'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
+        },
+        'playlist_mincount': 45,
+    }, {
+        'note': 'Single page of full list (no filters here)',
+        'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
+        'info_dict': {
+            'id': 'daynia/page=1',
+            'title': 'Pornstar Daynia videos (page=1)',
+            'description': 're:.{180,}',
+        },
+        'playlist_count': 26,
+    }]
+
+    @staticmethod
+    def _get_title_from_slug(title_slug):
+        return re.sub(r'_', ' ', title_slug).title()
+
+    def _real_extract(self, url):
+        pl_id = self._match_id(url)
+        html = self._download_webpage(url, pl_id)
+        playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
+        INFO_ELEMENT_RE = r'''(?x)
+            <div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
+            (?P<info>[\s\S]+?)(?:</div>\s*){6,}
+        '''
+
+        infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
+        if infos:
+            infos = re.sub(
+                r'(?:\s*nl=nl)+\s*', ' ',
+                re.sub(r'(?u)\s+', ' ', clean_html(
+                    re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
+
+        return merge_dicts({
+            'description': infos.strip() or None,
+        }, playlist)
+
+
+class YouPornVideosIE(YouPornListBase):
+    IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?youporn\.com/
+            (?:(?P<id>browse)/)?
+            (?P<sort>(?(id)
+                (?:duration|rating|time|views)|
+                (?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
+            (?:[/#?]|$)
+    '''
+    _PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
+    _TESTS = [{
+        'note': 'Full list with pagination (too long for test)',
+        'url': 'https://www.youporn.com/',
+        'info_dict': {
+            'id': 'youporn',
+            'title': 'YouPorn videos',
+        },
+        'only_matching': True,
+    }, {
+        'note': 'Full list with pagination (too long for test)',
+        'url': 'https://www.youporn.com/recommended',
+        'info_dict': {
+            'id': 'youporn/recommended',
+            'title': 'YouPorn videos by recommended',
+        },
+        'only_matching': True,
+    }, {
+        'note': 'Full list with pagination (too long for test)',
+        'url': 'https://www.youporn.com/top_rated',
+        'info_dict': {
+            'id': 'youporn/top_rated',
+            'title': 'YouPorn videos by top rated',
+        },
+        'only_matching': True,
+    }, {
+        'note': 'Full list with pagination (too long for test)',
+        'url': 'https://www.youporn.com/browse/time',
+        'info_dict': {
+            'id': 'browse/time',
+            'title': 'YouPorn videos by time',
+        },
+        'only_matching': True,
+    }, {
+        'note': 'Filtered paginated list with single page result',
+        'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
+        'info_dict': {
+            'id': 'youporn/most_favorited/max_minutes=2/res=VR',
+            'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
+        },
+        'playlist_mincount': 10,
+        'playlist_maxcount': 28,
+    }, {
+        'note': 'Filtered paginated list with several pages',
+        'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
+        'info_dict': {
+            'id': 'youporn/most_favorited/max_minutes=5/res=VR',
+            'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
+        },
+        'playlist_mincount': 45,
+    }, {
+        'note': 'Single page of full list',
+        'url': 'https://www.youporn.com/browse/time?page=1',
+        'info_dict': {
+            'id': 'browse/time/page=1',
+            'title': 'YouPorn videos by time (page=1)',
+        },
+        'playlist_count': 36,
+    }]
+
+    @staticmethod
+    def _get_title_from_slug(title_slug):
+        return 'YouPorn' if title_slug == 'browse' else title_slug

From a08f2b7e4567cdc50c0614ee0a4ffdff49b8b6e6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 15 May 2024 16:57:59 +0100
Subject: [PATCH 13/48] [workflows/ci.yml] Temporary workaround for Python 3.5
 _pip_ failures

https://github.com/actions/setup-python/issues/866
---
 .github/workflows/ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 93562afd7..d3b9ae016 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -159,6 +159,9 @@ jobs:
       # wrap broken actions/setup-python@v4
       # NB may run apt-get install in Linux
       uses: ytdl-org/setup-python@v1
+      env:
+        # Temporary workaround for Python 3.5 failures - May 2024
+        PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
       with:
         python-version: ${{ matrix.python-version }}
         cache-build: true

From 06da64ee51cd405b9392ba484cf7d3d31a88ee30 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 21 Apr 2024 23:42:08 +0100
Subject: [PATCH 14/48] [utils] Update traverse_obj() from yt-dlp * remove
 `is_user_input` option per https://github.com/yt-dlp/yt-dlp/pull/8673 *
 support traversal of compat_xml_etree_ElementTree_Element per
 https://github.com/yt-dlp/yt-dlp/pull/8911 * allow un/branching using all and
 any per https://github.com/yt-dlp/yt-dlp/pull/9571 * support traversal of
 compat_cookies.Morsel and multiple types in `set()` keys per
 https://github.com/yt-dlp/yt-dlp/pull/9577 thx Grub4k for these * also, move
 traversal tests to a separate class * allow for unordered dicts in tests for
 Py<3.7

---
 test/test_utils.py   | 257 +++++++++++++++++++++++++++++++------------
 youtube_dl/compat.py |   9 +-
 youtube_dl/utils.py  | 102 +++++++++++------
 3 files changed, 267 insertions(+), 101 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index ca36909a8..179d21cf5 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -123,6 +123,7 @@ from youtube_dl.compat import (
     compat_chr,
     compat_etree_fromstring,
     compat_getenv,
+    compat_http_cookies,
     compat_os_name,
     compat_setenv,
     compat_str,
@@ -132,10 +133,6 @@ from youtube_dl.compat import (
 
 class TestUtil(unittest.TestCase):
 
-    # yt-dlp shim
-    def assertCountEqual(self, expected, got, msg='count should be the same'):
-        return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
-
     def test_timeconvert(self):
         self.assertTrue(timeconvert('') is None)
         self.assertTrue(timeconvert('bougrg') is None)
@@ -740,28 +737,6 @@ class TestUtil(unittest.TestCase):
         self.assertRaises(
             ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
 
-    def test_dict_get(self):
-        FALSE_VALUES = {
-            'none': None,
-            'false': False,
-            'zero': 0,
-            'empty_string': '',
-            'empty_list': [],
-        }
-        d = FALSE_VALUES.copy()
-        d['a'] = 42
-        self.assertEqual(dict_get(d, 'a'), 42)
-        self.assertEqual(dict_get(d, 'b'), None)
-        self.assertEqual(dict_get(d, 'b', 42), 42)
-        self.assertEqual(dict_get(d, ('a', )), 42)
-        self.assertEqual(dict_get(d, ('b', 'a', )), 42)
-        self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
-        self.assertEqual(dict_get(d, ('b', 'c', )), None)
-        self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
-        for key, false_value in FALSE_VALUES.items():
-            self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
-            self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
-
     def test_merge_dicts(self):
         self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
         self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
@@ -1703,24 +1678,46 @@ Line 1
         self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
         self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
 
+    def test_join_nonempty(self):
+        self.assertEqual(join_nonempty('a', 'b'), 'a-b')
+        self.assertEqual(join_nonempty(
+            'a', 'b', 'c', 'd',
+            from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
+
+
+class TestTraversal(unittest.TestCase):
+    str = compat_str
+    _TEST_DATA = {
+        100: 100,
+        1.2: 1.2,
+        'str': 'str',
+        'None': None,
+        '...': Ellipsis,
+        'urls': [
+            {'index': 0, 'url': 'https://www.example.com/0'},
+            {'index': 1, 'url': 'https://www.example.com/1'},
+        ],
+        'data': (
+            {'index': 2},
+            {'index': 3},
+        ),
+        'dict': {},
+    }
+
+    # yt-dlp shim
+    def assertCountEqual(self, expected, got, msg='count should be the same'):
+        return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
+
+    def assertMaybeCountEqual(self, *args, **kwargs):
+        if sys.version_info < (3, 7):
+            # random dict order
+            return self.assertCountEqual(*args, **kwargs)
+        else:
+            return self.assertEqual(*args, **kwargs)
+
     def test_traverse_obj(self):
-        str = compat_str
-        _TEST_DATA = {
-            100: 100,
-            1.2: 1.2,
-            'str': 'str',
-            'None': None,
-            '...': Ellipsis,
-            'urls': [
-                {'index': 0, 'url': 'https://www.example.com/0'},
-                {'index': 1, 'url': 'https://www.example.com/1'},
-            ],
-            'data': (
-                {'index': 2},
-                {'index': 3},
-            ),
-            'dict': {},
-        }
+        str = self.str
+        _TEST_DATA = self._TEST_DATA
 
         # define a pukka Iterable
         def iter_range(stop):
@@ -1771,15 +1768,19 @@ Line 1
         # Test set as key (transformation/type, like `expected_type`)
         self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
                          msg='Function in set should be a transformation')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('fail', T(lambda _: 'const'))), 'const',
+                         msg='Function in set should always be called')
         self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
                          msg='Type in set should be a type filter')
+        self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str, int))), [100, 'str'],
+                                   msg='Multiple types in set should be a type filter')
         self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
                          msg='A single set should be wrapped into a path')
         self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
                          msg='Transformation function should not raise')
-        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
-                         [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
-                         msg='Function in set should be a transformation')
+        self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
+                                   [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
+                                   msg='Function in set should be a transformation')
         if __debug__:
             with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
                 traverse_obj(_TEST_DATA, set())
@@ -1992,23 +1993,6 @@ Line 1
         self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
                          msg='branching should result in list if `traverse_string`')
 
-        # Test is_user_input behavior
-        _IS_USER_INPUT_DATA = {'range8': list(range(8))}
-        self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
-                                      _is_user_input=True), 3,
-                         msg='allow for string indexing if `is_user_input`')
-        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
-                                           _is_user_input=True), tuple(range(8))[3:],
-                              msg='allow for string slice if `is_user_input`')
-        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
-                                           _is_user_input=True), tuple(range(8))[:4:2],
-                              msg='allow step in string slice if `is_user_input`')
-        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
-                                           _is_user_input=True), range(8),
-                              msg='`:` should be treated as `...` if `is_user_input`')
-        with self.assertRaises(TypeError, msg='too many params should result in error'):
-            traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
-
         # Test re.Match as input obj
         mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
         self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
@@ -2030,14 +2014,151 @@ Line 1
         self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
                          msg='function on a `re.Match` should give group name as well')
 
+        # Test xml.etree.ElementTree.Element as input obj
+        etree = compat_etree_fromstring('''<?xml version="1.0"?>
+        <data>
+            <country name="Liechtenstein">
+                <rank>1</rank>
+                <year>2008</year>
+                <gdppc>141100</gdppc>
+                <neighbor name="Austria" direction="E"/>
+                <neighbor name="Switzerland" direction="W"/>
+            </country>
+            <country name="Singapore">
+                <rank>4</rank>
+                <year>2011</year>
+                <gdppc>59900</gdppc>
+                <neighbor name="Malaysia" direction="N"/>
+            </country>
+            <country name="Panama">
+                <rank>68</rank>
+                <year>2011</year>
+                <gdppc>13600</gdppc>
+                <neighbor name="Costa Rica" direction="W"/>
+                <neighbor name="Colombia" direction="E"/>
+            </country>
+        </data>''')
+        self.assertEqual(traverse_obj(etree, ''), etree,
+                         msg='empty str key should return the element itself')
+        self.assertEqual(traverse_obj(etree, 'country'), list(etree),
+                         msg='str key should return all children with that tag name')
+        self.assertEqual(traverse_obj(etree, Ellipsis), list(etree),
+                         msg='`...` as key should return all children')
+        self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
+                         msg='function as key should get element as value')
+        self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
+                         msg='function as key should get index as key')
+        self.assertEqual(traverse_obj(etree, 0), etree[0],
+                         msg='int key should return the nth child')
+        self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
+                         ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
+                         msg='`@<attribute>` at end of path should give that attribute')
+        self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
+                         msg='`@<nonexistent>` at end of path should give `None`')
+        self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
+                         msg='`@` should give the full attribute dict')
+        self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
+                         msg='`text()` at end of path should give the inner text')
+        self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
+                         msg='full python xpath features should be supported')
+        self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
+                         msg='special transformations should act on current element')
+        self.assertEqual(traverse_obj(etree, ('country', 0, Ellipsis, 'text()', T(int_or_none))), [1, 2008, 141100],
+                         msg='special transformations should act on current element')
+
+    def test_traversal_unbranching(self):
+        # str = self.str
+        _TEST_DATA = self._TEST_DATA
+
+        self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), all]), [100, 1.2],
+                         msg='`all` should give all results as list')
+        self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), any]), 100,
+                         msg='`any` should give the first result')
+        self.assertEqual(traverse_obj(_TEST_DATA, [100, all]), [100],
+                         msg='`all` should give list if non branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, [100, any]), 100,
+                         msg='`any` should give single item if non branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]), [100],
+                         msg='`all` should filter `None` and empty dict')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]), 100,
+                         msg='`any` should filter `None` and empty dict')
+        self.assertEqual(traverse_obj(_TEST_DATA, [{
+            'all': [('dict', 'None', 100, 1.2), all],
+            'any': [('dict', 'None', 100, 1.2), any],
+        }]), {'all': [100, 1.2], 'any': 100},
+            msg='`all`/`any` should apply to each dict path separately')
+        self.assertEqual(traverse_obj(_TEST_DATA, [{
+            'all': [('dict', 'None', 100, 1.2), all],
+            'any': [('dict', 'None', 100, 1.2), any],
+        }], get_all=False), {'all': [100, 1.2], 'any': 100},
+            msg='`all`/`any` should apply to dict regardless of `get_all`')
+        self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, T(float)]), None,
+                      msg='`all` should reset branching status')
+        self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, T(float)]), None,
+                      msg='`any` should reset branching status')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, Ellipsis, T(float)]), [1.2],
+                         msg='`all` should allow further branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, Ellipsis, 'index']), [0, 1],
+                         msg='`any` should allow further branching')
+
+    def test_traversal_morsel(self):
+        values = {
+            'expires': 'a',
+            'path': 'b',
+            'comment': 'c',
+            'domain': 'd',
+            'max-age': 'e',
+            'secure': 'f',
+            'httponly': 'g',
+            'version': 'h',
+            'samesite': 'i',
+        }
+        # SameSite added in Py3.8, breaks .update for 3.5-3.7
+        if sys.version_info < (3, 8):
+            del values['samesite']
+        morsel = compat_http_cookies.Morsel()
+        morsel.set(str('item_key'), 'item_value', 'coded_value')
+        morsel.update(values)
+        values['key'] = str('item_key')
+        values['value'] = 'item_value'
+        values = dict((str(k), v) for k, v in values.items())
+        # make test pass even without ordered dict
+        value_set = set(values.values())
+
+        for key, value in values.items():
+            self.assertEqual(traverse_obj(morsel, key), value,
+                             msg='Morsel should provide access to all values')
+        self.assertEqual(set(traverse_obj(morsel, Ellipsis)), value_set,
+                         msg='`...` should yield all values')
+        self.assertEqual(set(traverse_obj(morsel, lambda k, v: True)), value_set,
+                         msg='function key should yield all values')
+        self.assertIs(traverse_obj(morsel, [(None,), any]), morsel,
+                      msg='Morsel should not be implicitly changed to dict on usage')
+
     def test_get_first(self):
         self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
 
-    def test_join_nonempty(self):
-        self.assertEqual(join_nonempty('a', 'b'), 'a-b')
-        self.assertEqual(join_nonempty(
-            'a', 'b', 'c', 'd',
-            from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
+    def test_dict_get(self):
+        FALSE_VALUES = {
+            'none': None,
+            'false': False,
+            'zero': 0,
+            'empty_string': '',
+            'empty_list': [],
+        }
+        d = FALSE_VALUES.copy()
+        d['a'] = 42
+        self.assertEqual(dict_get(d, 'a'), 42)
+        self.assertEqual(dict_get(d, 'b'), None)
+        self.assertEqual(dict_get(d, 'b', 42), 42)
+        self.assertEqual(dict_get(d, ('a', )), 42)
+        self.assertEqual(dict_get(d, ('b', 'a', )), 42)
+        self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
+        self.assertEqual(dict_get(d, ('b', 'c', )), None)
+        self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
+        for key, false_value in FALSE_VALUES.items():
+            self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
+            self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 53ff2a892..d5485c7e8 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2719,8 +2719,14 @@ if sys.version_info < (2, 7):
         if isinstance(xpath, compat_str):
             xpath = xpath.encode('ascii')
         return xpath
+
+    def compat_etree_iterfind(element, match):
+        for from_ in element.findall(match):
+            yield from_
+
 else:
     compat_xpath = lambda xpath: xpath
+    compat_etree_iterfind = lambda element, match: element.iterfind(match)
 
 
 compat_os_name = os._name if os.name == 'java' else os.name
@@ -2955,7 +2961,7 @@ except ImportError:
             return self
 
         def __exit__(self, exc_type, exc_val, exc_tb):
-            return exc_val is not None and isinstance(exc_val, self._exceptions or tuple())
+            return exc_type is not None and issubclass(exc_type, self._exceptions or tuple())
 
 
 # subprocess.Popen context manager
@@ -3308,6 +3314,7 @@ __all__ = [
     'compat_contextlib_suppress',
     'compat_ctypes_WINFUNCTYPE',
     'compat_etree_fromstring',
+    'compat_etree_iterfind',
     'compat_filter',
     'compat_get_terminal_size',
     'compat_getenv',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e1b05b307..cd4303566 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -49,11 +49,14 @@ from .compat import (
     compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
     compat_datetime_timedelta_total_seconds,
+    compat_etree_Element,
     compat_etree_fromstring,
+    compat_etree_iterfind,
     compat_expanduser,
     compat_html_entities,
     compat_html_entities_html5,
     compat_http_client,
+    compat_http_cookies,
     compat_integer_types,
     compat_kwargs,
     compat_ncompress as ncompress,
@@ -6253,15 +6256,16 @@ if __debug__:
 
 def traverse_obj(obj, *paths, **kwargs):
     """
-    Safely traverse nested `dict`s and `Iterable`s
+    Safely traverse nested `dict`s and `Iterable`s, etc
 
     >>> obj = [{}, {"key": "value"}]
     >>> traverse_obj(obj, (1, "key"))
-    "value"
+    'value'
 
     Each of the provided `paths` is tested and the first producing a valid result will be returned.
     The next path will also be tested if the path branched but no results could be found.
-    Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
+    Supported values for traversal are `Mapping`, `Iterable`, `re.Match`, `xml.etree.ElementTree`
+    (xpath) and `http.cookies.Morsel`.
     Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
 
     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
@@ -6269,8 +6273,9 @@ def traverse_obj(obj, *paths, **kwargs):
     The keys in the path can be one of:
         - `None`:           Return the current object.
         - `set`:            Requires the only item in the set to be a type or function,
-                            like `{type}`/`{func}`. If a `type`, returns only values
-                            of this type. If a function, returns `func(obj)`.
+                            like `{type}`/`{type, type, ...}`/`{func}`. If one or more `type`s,
+                            return only values that have one of the types. If a function,
+                            return `func(obj)`.
         - `str`/`int`:      Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
         - `slice`:          Branch out and return all values in `obj[key]`.
         - `Ellipsis`:       Branch out and return a list of all values.
@@ -6282,8 +6287,10 @@ def traverse_obj(obj, *paths, **kwargs):
                             For `Iterable`s, `key` is the enumeration count of the value.
                             For `re.Match`es, `key` is the group number (0 = full match)
                             as well as additionally any group names, if given.
-        - `dict`            Transform the current object and return a matching dict.
+        - `dict`:           Transform the current object and return a matching dict.
                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
+        - `any`-builtin:    Take the first matching object and return it, resetting branching.
+        - `all`-builtin:    Take all matching objects and return them as a list, resetting branching.
 
         `tuple`, `list`, and `dict` all support nested paths and branches.
 
@@ -6299,10 +6306,8 @@ def traverse_obj(obj, *paths, **kwargs):
     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
     @param casesense        If `False`, consider string dictionary keys as case insensitive.
 
-    The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
+    The following is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
 
-    @param _is_user_input    Whether the keys are generated from user input.
-                            If `True` strings get converted to `int`/`slice` if needed.
     @param _traverse_string  Whether to traverse into objects as strings.
                             If `True`, any non-compatible object will first be
                             converted into a string and then traversed into.
@@ -6322,7 +6327,6 @@ def traverse_obj(obj, *paths, **kwargs):
     expected_type = kwargs.get('expected_type')
     get_all = kwargs.get('get_all', True)
     casesense = kwargs.get('casesense', True)
-    _is_user_input = kwargs.get('_is_user_input', False)
     _traverse_string = kwargs.get('_traverse_string', False)
 
     # instant compat
@@ -6336,10 +6340,8 @@ def traverse_obj(obj, *paths, **kwargs):
         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
 
     def lookup_or_none(v, k, getter=None):
-        try:
+        with compat_contextlib_suppress(LookupError):
             return getter(v, k) if getter else v[k]
-        except IndexError:
-            return None
 
     def from_iterable(iterables):
         # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
@@ -6361,12 +6363,13 @@ def traverse_obj(obj, *paths, **kwargs):
             result = obj
 
         elif isinstance(key, set):
-            assert len(key) == 1, 'Set should only be used to wrap a single item'
-            item = next(iter(key))
-            if isinstance(item, type):
-                result = obj if isinstance(obj, item) else None
+            assert len(key) >= 1, 'At least one item is required in a `set` key'
+            if all(isinstance(item, type) for item in key):
+                result = obj if isinstance(obj, tuple(key)) else None
             else:
-                result = try_call(item, args=(obj,))
+                item = next(iter(key))
+                assert len(key) == 1, 'Multiple items in a `set` key must all be types'
+                result = try_call(item, args=(obj,)) if not isinstance(item, type) else None
 
         elif isinstance(key, (list, tuple)):
             branching = True
@@ -6375,9 +6378,11 @@ def traverse_obj(obj, *paths, **kwargs):
 
         elif key is Ellipsis:
             branching = True
+            if isinstance(obj, compat_http_cookies.Morsel):
+                obj = dict(obj, key=obj.key, value=obj.value)
             if isinstance(obj, compat_collections_abc.Mapping):
                 result = obj.values()
-            elif is_iterable_like(obj):
+            elif is_iterable_like(obj, (compat_collections_abc.Iterable, compat_etree_Element)):
                 result = obj
             elif isinstance(obj, compat_re_Match):
                 result = obj.groups()
@@ -6389,9 +6394,11 @@ def traverse_obj(obj, *paths, **kwargs):
 
         elif callable(key):
             branching = True
+            if isinstance(obj, compat_http_cookies.Morsel):
+                obj = dict(obj, key=obj.key, value=obj.value)
             if isinstance(obj, compat_collections_abc.Mapping):
                 iter_obj = obj.items()
-            elif is_iterable_like(obj):
+            elif is_iterable_like(obj, (compat_collections_abc.Iterable, compat_etree_Element)):
                 iter_obj = enumerate(obj)
             elif isinstance(obj, compat_re_Match):
                 iter_obj = itertools.chain(
@@ -6413,6 +6420,8 @@ def traverse_obj(obj, *paths, **kwargs):
                           if v is not None or default is not NO_DEFAULT) or None
 
         elif isinstance(obj, compat_collections_abc.Mapping):
+            if isinstance(obj, compat_http_cookies.Morsel):
+                obj = dict(obj, key=obj.key, value=obj.value)
             result = (try_call(obj.get, args=(key,))
                       if casesense or try_call(obj.__contains__, args=(key,))
                       else next((v for k, v in obj.items() if casefold(k) == key), None))
@@ -6430,12 +6439,40 @@ def traverse_obj(obj, *paths, **kwargs):
         else:
             result = None
             if isinstance(key, (int, slice)):
-                if is_iterable_like(obj, compat_collections_abc.Sequence):
+                if is_iterable_like(obj, (compat_collections_abc.Sequence, compat_etree_Element)):
                     branching = isinstance(key, slice)
                     result = lookup_or_none(obj, key)
                 elif _traverse_string:
                     result = lookup_or_none(str(obj), key)
 
+            elif isinstance(obj, compat_etree_Element) and isinstance(key, str):
+                xpath, _, special = key.rpartition('/')
+                if not special.startswith('@') and not special.endswith('()'):
+                    xpath = key
+                    special = None
+
+                # Allow abbreviations of relative paths, absolute paths error
+                if xpath.startswith('/'):
+                    xpath = '.' + xpath
+                elif xpath and not xpath.startswith('./'):
+                    xpath = './' + xpath
+
+                def apply_specials(element):
+                    if special is None:
+                        return element
+                    if special == '@':
+                        return element.attrib
+                    if special.startswith('@'):
+                        return try_call(element.attrib.get, args=(special[1:],))
+                    if special == 'text()':
+                        return element.text
+                    raise SyntaxError('apply_specials is missing case for {0!r}'.format(special))
+
+                if xpath:
+                    result = list(map(apply_specials, compat_etree_iterfind(obj, xpath)))
+                else:
+                    result = apply_specials(obj)
+
         return branching, result if branching else (result,)
 
     def lazy_last(iterable):
@@ -6456,17 +6493,18 @@ def traverse_obj(obj, *paths, **kwargs):
 
         key = None
         for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
-            if _is_user_input and isinstance(key, str):
-                if key == ':':
-                    key = Ellipsis
-                elif ':' in key:
-                    key = slice(*map(int_or_none, key.split(':')))
-                elif int_or_none(key) is not None:
-                    key = int(key)
-
             if not casesense and isinstance(key, str):
                 key = compat_casefold(key)
 
+            if key in (any, all):
+                has_branched = False
+                filtered_objs = (obj for obj in objs if obj not in (None, {}))
+                if key is any:
+                    objs = (next(filtered_objs, None),)
+                else:
+                    objs = (list(filtered_objs),)
+                continue
+
             if __debug__ and callable(key):
                 # Verify function signature
                 _try_bind_args(key, None, None)
@@ -6505,9 +6543,9 @@ def traverse_obj(obj, *paths, **kwargs):
     return None if default is NO_DEFAULT else default
 
 
-def T(x):
-    """ For use in yt-dl instead of {type} or set((type,)) """
-    return set((x,))
+def T(*x):
+    """ For use in yt-dl instead of {type, ...} or set((type, ...)) """
+    return set(x)
 
 
 def get_first(obj, keys, **kwargs):

From 34484e49f5cd91a830f5459a5b673b7c05a22e24 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 May 2024 16:38:20 +0100
Subject: [PATCH 15/48] [compat] Improve compat_etree_iterfind for Py2.6
 Adapted from
 https://raw.githubusercontent.com/python/cpython/2.7/Lib/xml/etree/ElementPath.py

---
 youtube_dl/compat.py | 214 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 211 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index d5485c7e8..0371896ab 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2720,9 +2720,217 @@ if sys.version_info < (2, 7):
             xpath = xpath.encode('ascii')
         return xpath
 
-    def compat_etree_iterfind(element, match):
-        for from_ in element.findall(match):
-            yield from_
+    # further code below based on CPython 2.7 source
+    import functools
+
+    _xpath_tokenizer_re = re.compile(r'''(?x)
+        (                                   # (1)
+            '[^']*'|"[^"]*"|                # quoted strings, or
+            ::|//?|\.\.|\(\)|[/.*:[\]()@=]  # navigation specials
+        )|                                  # or (2)
+        ((?:\{[^}]+\})?[^/[\]()@=\s]+)|     # token: optional {ns}, no specials
+        \s+                                 # or white space
+    ''')
+
+    def _xpath_tokenizer(pattern, namespaces=None):
+        for token in _xpath_tokenizer_re.findall(pattern):
+            tag = token[1]
+            if tag and tag[0] != "{" and ":" in tag:
+                try:
+                    if not namespaces:
+                        raise KeyError
+                    prefix, uri = tag.split(":", 1)
+                    yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+                except KeyError:
+                    raise SyntaxError("prefix %r not found in prefix map" % prefix)
+            else:
+                yield token
+
+    def _get_parent_map(context):
+        parent_map = context.parent_map
+        if parent_map is None:
+            context.parent_map = parent_map = {}
+            for p in context.root.getiterator():
+                for e in p:
+                    parent_map[e] = p
+        return parent_map
+
+    def _select(context, result, filter_fn=lambda *_: True):
+        for elem in result:
+            for e in elem:
+                if filter_fn(e, elem):
+                    yield e
+
+    def _prepare_child(next_, token):
+        tag = token[1]
+        return functools.partial(_select, filter_fn=lambda e, _: e.tag == tag)
+
+    def _prepare_star(next_, token):
+        return _select
+
+    def _prepare_self(next_, token):
+        return lambda _, result: (e for e in result)
+
+    def _prepare_descendant(next_, token):
+        token = next(next_)
+        if token[0] == "*":
+            tag = "*"
+        elif not token[0]:
+            tag = token[1]
+        else:
+            raise SyntaxError("invalid descendant")
+
+        def select(context, result):
+            for elem in result:
+                for e in elem.getiterator(tag):
+                    if e is not elem:
+                        yield e
+        return select
+
+    def _prepare_parent(next_, token):
+        def select(context, result):
+            # FIXME: raise error if .. is applied at toplevel?
+            parent_map = _get_parent_map(context)
+            result_map = {}
+            for elem in result:
+                if elem in parent_map:
+                    parent = parent_map[elem]
+                    if parent not in result_map:
+                        result_map[parent] = None
+                        yield parent
+        return select
+
+    def _prepare_predicate(next_, token):
+        signature = []
+        predicate = []
+        for token in next_:
+            if token[0] == "]":
+                break
+            if token[0] and token[0][:1] in "'\"":
+                token = "'", token[0][1:-1]
+            signature.append(token[0] or "-")
+            predicate.append(token[1])
+
+        def select(context, result, filter_fn=lambda _: True):
+            for elem in result:
+                if filter_fn(elem):
+                    yield elem
+
+        signature = "".join(signature)
+        # use signature to determine predicate type
+        if signature == "@-":
+            # [@attribute] predicate
+            key = predicate[1]
+            return functools.partial(
+                select, filter_fn=lambda el: el.get(key) is not None)
+        if signature == "@-='":
+            # [@attribute='value']
+            key = predicate[1]
+            value = predicate[-1]
+            return functools.partial(
+                select, filter_fn=lambda el: el.get(key) == value)
+        if signature == "-" and not re.match(r"\d+$", predicate[0]):
+            # [tag]
+            tag = predicate[0]
+            return functools.partial(
+                select, filter_fn=lambda el: el.find(tag) is not None)
+        if signature == "-='" and not re.match(r"\d+$", predicate[0]):
+            # [tag='value']
+            tag = predicate[0]
+            value = predicate[-1]
+
+            def itertext(el):
+                for e in el.getiterator():
+                    e = e.text
+                    if e:
+                        yield e
+
+            def select(context, result):
+                for elem in result:
+                    for e in elem.findall(tag):
+                        if "".join(itertext(e)) == value:
+                            yield elem
+                            break
+            return select
+        if signature == "-" or signature == "-()" or signature == "-()-":
+            # [index] or [last()] or [last()-index]
+            if signature == "-":
+                index = int(predicate[0]) - 1
+            else:
+                if predicate[0] != "last":
+                    raise SyntaxError("unsupported function")
+                if signature == "-()-":
+                    try:
+                        index = int(predicate[2]) - 1
+                    except ValueError:
+                        raise SyntaxError("unsupported expression")
+                else:
+                    index = -1
+
+            def select(context, result):
+                parent_map = _get_parent_map(context)
+                for elem in result:
+                    try:
+                        parent = parent_map[elem]
+                        # FIXME: what if the selector is "*" ?
+                        elems = list(parent.findall(elem.tag))
+                        if elems[index] is elem:
+                            yield elem
+                    except (IndexError, KeyError):
+                        pass
+            return select
+        raise SyntaxError("invalid predicate")
+
+    ops = {
+        "": _prepare_child,
+        "*": _prepare_star,
+        ".": _prepare_self,
+        "..": _prepare_parent,
+        "//": _prepare_descendant,
+        "[": _prepare_predicate,
+    }
+
+    _cache = {}
+
+    class _SelectorContext:
+        parent_map = None
+
+        def __init__(self, root):
+            self.root = root
+
+    ##
+    # Generate all matching objects.
+
+    def compat_etree_iterfind(elem, path, namespaces=None):
+        # compile selector pattern
+        if path[-1:] == "/":
+            path = path + "*"  # implicit all (FIXME: keep this?)
+        try:
+            selector = _cache[path]
+        except KeyError:
+            if len(_cache) > 100:
+                _cache.clear()
+            if path[:1] == "/":
+                raise SyntaxError("cannot use absolute path on element")
+            tokens = _xpath_tokenizer(path, namespaces)
+            selector = []
+            for token in tokens:
+                if token[0] == "/":
+                    continue
+                try:
+                    selector.append(ops[token[0]](tokens, token))
+                except StopIteration:
+                    raise SyntaxError("invalid path")
+            _cache[path] = selector
+        # execute selector pattern
+        result = [elem]
+        context = _SelectorContext(elem)
+        for select in selector:
+            result = select(context, result)
+        return result
+
+    # end of code based on CPython 2.7 source
+
 
 else:
     compat_xpath = lambda xpath: xpath

From eee9a247eb3ef876ce6d9f5e34275e46a2d06d10 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 May 2024 17:16:58 +0100
Subject: [PATCH 16/48] [utils] Split out traversal.py dummy and traversal
 tests

---
 test/test_traversal.py  | 509 ++++++++++++++++++++++++++++++++++++++++
 test/test_utils.py      | 483 --------------------------------------
 youtube_dl/traversal.py |  10 +
 3 files changed, 519 insertions(+), 483 deletions(-)
 create mode 100644 test/test_traversal.py
 create mode 100644 youtube_dl/traversal.py

diff --git a/test/test_traversal.py b/test/test_traversal.py
new file mode 100644
index 000000000..00a428edb
--- /dev/null
+++ b/test/test_traversal.py
@@ -0,0 +1,509 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import re
+
+from youtube_dl.traversal import (
+    dict_get,
+    get_first,
+    T,
+    traverse_obj,
+)
+from youtube_dl.compat import (
+    compat_etree_fromstring,
+    compat_http_cookies,
+    compat_str,
+)
+from youtube_dl.utils import (
+    int_or_none,
+    str_or_none,
+)
+
+_TEST_DATA = {
+    100: 100,
+    1.2: 1.2,
+    'str': 'str',
+    'None': None,
+    '...': Ellipsis,
+    'urls': [
+        {'index': 0, 'url': 'https://www.example.com/0'},
+        {'index': 1, 'url': 'https://www.example.com/1'},
+    ],
+    'data': (
+        {'index': 2},
+        {'index': 3},
+    ),
+    'dict': {},
+}
+
+
+if sys.version_info < (3, 0):
+    class _TestCase(unittest.TestCase):
+
+        def assertCountEqual(self, *args, **kwargs):
+            return self.assertItemsEqual(*args, **kwargs)
+else:
+    _TestCase = unittest.TestCase
+
+
+class TestTraversal(_TestCase):
+    def assertMaybeCountEqual(self, *args, **kwargs):
+        if sys.version_info < (3, 7):
+            # random dict order
+            return self.assertCountEqual(*args, **kwargs)
+        else:
+            return self.assertEqual(*args, **kwargs)
+
+    def test_traverse_obj(self):
+        # instant compat
+        str = compat_str
+
+        # define a pukka Iterable
+        def iter_range(stop):
+            for from_ in range(stop):
+                yield from_
+
+        # Test base functionality
+        self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
+                         msg='allow tuple path')
+        self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
+                         msg='allow list path')
+        self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
+                         msg='allow iterable path')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
+                         msg='single items should be treated as a path')
+        self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
+        self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
+        self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
+
+        # Test Ellipsis behavior
+        self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
+                              (item for item in _TEST_DATA.values() if item not in (None, {})),
+                              msg='`...` should give all non-discarded values')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
+                              msg='`...` selection for dicts should select all values')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
+                         ['https://www.example.com/0', 'https://www.example.com/1'],
+                         msg='nested `...` queries should work')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
+                              msg='`...` query result should be flattened')
+        self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
+                         msg='`...` should accept iterables')
+
+        # Test function as key
+        self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
+                         [_TEST_DATA['urls']],
+                         msg='function as query key should perform a filter based on (key, value)')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
+                              msg='exceptions in the query function should be caught')
+        self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
+                         msg='function key should accept iterables')
+        if __debug__:
+            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+                traverse_obj(_TEST_DATA, lambda a: Ellipsis)
+            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+                traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
+
+        # Test set as key (transformation/type, like `expected_type`)
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
+                         msg='Function in set should be a transformation')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('fail', T(lambda _: 'const'))), 'const',
+                         msg='Function in set should always be called')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
+                         msg='Type in set should be a type filter')
+        self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str, int))), [100, 'str'],
+                                   msg='Multiple types in set should be a type filter')
+        self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
+                         msg='A single set should be wrapped into a path')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
+                         msg='Transformation function should not raise')
+        self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
+                                   [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
+                                   msg='Function in set should be a transformation')
+        if __debug__:
+            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+                traverse_obj(_TEST_DATA, set())
+            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+                traverse_obj(_TEST_DATA, set((str.upper, str)))
+
+        # Test `slice` as a key
+        _SLICE_DATA = [0, 1, 2, 3, 4]
+        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
+                         msg='slice on a dictionary should not throw')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
+                         msg='slice key should apply slice to sequence')
+
+        # Test alternative paths
+        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
+                         msg='multiple `paths` should be treated as alternative paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
+                         msg='alternatives should exit early')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
+                         msg='alternatives should return `default` if exhausted')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
+                         msg='alternatives should track their own branching return')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
+                         msg='alternatives on empty objects should search further')
+
+        # Test branch and path nesting
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
+                         msg='tuple as key should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
+                         msg='list as key should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
+                         msg='double nesting in path should be treated as paths')
+        self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
+                         msg='do not fail early on branching')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
+                              ['https://www.example.com/0', 'https://www.example.com/1'],
+                              msg='triple nesting in path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
+                         ['https://www.example.com/0', 'https://www.example.com/1'],
+                         msg='ellipsis as branch path start gets flattened')
+
+        # Test dictionary as key
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
+                         msg='dict key should result in a dict with the same keys')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
+                         {0: 'https://www.example.com/0'},
+                         msg='dict key should allow paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
+                         {0: ['https://www.example.com/0']},
+                         msg='tuple in dict path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
+                         {0: ['https://www.example.com/0']},
+                         msg='double nesting in dict path should be treated as paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
+                         {0: ['https://www.example.com/1', 'https://www.example.com/0']},
+                         msg='triple nesting in dict path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
+                         msg='remove `None` values when top level dict key fails')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+                         msg='use `default` if key fails and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
+                         msg='remove empty values when dict key')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
+                         msg='use `default` when dict key and a default')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
+                         msg='remove empty values when nested dict key fails')
+        self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
+                         msg='default to dict if pruned')
+        self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+                         msg='default to dict if pruned and default is given')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
+                         msg='use nested `default` when nested dict key fails and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
+                         msg='remove key if branch in dict key not successful')
+
+        # Testing default parameter behavior
+        _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
+                         msg='default value should be `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
+                         msg='chained fails should result in default')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
+                         msg='should not short cirquit on `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
+                         msg='invalid dict key should result in `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
+                         msg='`None` is a deliberate sentinel and should become `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
+                         msg='`IndexError` should result in `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
+                         msg='if branched but not successful return `default` if defined, not `[]`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
+                         msg='if branched but not successful return `default` even if `default` is `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
+                         msg='if branched but not successful return `[]`, not `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
+                         msg='if branched but object is empty return `[]`, not `default`')
+        self.assertEqual(traverse_obj(None, Ellipsis), [],
+                         msg='if branched but object is `None` return `[]`, not `default`')
+        self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
+                         msg='if branched but state is `None` return `[]`, not `default`')
+
+        branching_paths = [
+            ('fail', Ellipsis),
+            (Ellipsis, 'fail'),
+            100 * ('fail',) + (Ellipsis,),
+            (Ellipsis,) + 100 * ('fail',),
+        ]
+        for branching_path in branching_paths:
+            self.assertEqual(traverse_obj({}, branching_path), [],
+                             msg='if branched but state is `None`, return `[]` (not `default`)')
+            self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
+                             msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
+            self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
+                             msg='if branching in last alternative and previous did match, return single value')
+            self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
+                             msg='if branching in first alternative and non-branching path does match, return single value')
+            self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
+                             msg='if branching in first alternative and non-branching path does not match, return `default`')
+
+        # Testing expected_type behavior
+        _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
+                         'str', msg='accept matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
+                         None, msg='reject non-matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
+                         '0', msg='transform type using type function')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
+                         None, msg='wrap expected_type function in try_call')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
+                         ['str'], msg='eliminate items that expected_type fails on')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
+                         {0: 100}, msg='type as expected_type should filter dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
+                         {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
+                         1, msg='expected_type should not filter non-final dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
+                         {0: {0: 100}}, msg='expected_type should transform deep dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
+                         [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
+        self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
+                         [4], msg='expected_type regression for type matching in tuple branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
+                         [], msg='expected_type regression for type matching in dict result')
+
+        # Test get_all behavior
+        _GET_ALL_DATA = {'key': [0, 1, 2]}
+        self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
+                         msg='if not `get_all`, return only first matching value')
+        self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
+                         msg='do not overflatten if not `get_all`')
+
+        # Test casesense behavior
+        _CASESENSE_DATA = {
+            'KeY': 'value0',
+            0: {
+                'KeY': 'value1',
+                0: {'KeY': 'value2'},
+            },
+            # FULLWIDTH LATIN CAPITAL LETTER K
+            '\uff2bey': 'value3',
+        }
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
+                         msg='dict keys should be case sensitive unless `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
+                                      casesense=False), 'value0',
+                         msg='allow non matching key case if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey',  # FULLWIDTH LATIN SMALL LETTER K
+                                      casesense=False), 'value3',
+                         msg='allow non matching Unicode key case if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
+                                      casesense=False), ['value1'],
+                         msg='allow non matching key case in branch if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
+                                      casesense=False), ['value2'],
+                         msg='allow non matching key case in branch path if `casesense`')
+
+        # Test traverse_string behavior
+        _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
+                         msg='do not traverse into string if not `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
+                                      _traverse_string=True), 's',
+                         msg='traverse into string if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
+                                      _traverse_string=True), '.',
+                         msg='traverse into converted data if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
+                                      _traverse_string=True), 'str',
+                         msg='`...` should result in string (same value) if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
+                                      _traverse_string=True), 'sr',
+                         msg='`slice` should result in string if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
+                                      _traverse_string=True), 'str',
+                         msg='function should result in string if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
+                                      _traverse_string=True), ['s', 'r'],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+
+        # Test re.Match as input obj
+        mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
+        self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
+                         msg='`...` on a `re.Match` should give its `groups()`')
+        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
+                         msg='function on a `re.Match` should give groupno, value starting at 0')
+        self.assertEqual(traverse_obj(mobj, 'group'), '3',
+                         msg='str key on a `re.Match` should give group with that name')
+        self.assertEqual(traverse_obj(mobj, 2), '3',
+                         msg='int key on a `re.Match` should give group with that name')
+        self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
+                         msg='str key on a `re.Match` should respect casesense')
+        self.assertEqual(traverse_obj(mobj, 'fail'), None,
+                         msg='failing str key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
+                         msg='failing str key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, 8), None,
+                         msg='failing int key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
+                         msg='function on a `re.Match` should give group name as well')
+
+        # Test xml.etree.ElementTree.Element as input obj
+        etree = compat_etree_fromstring('''<?xml version="1.0"?>
+        <data>
+            <country name="Liechtenstein">
+                <rank>1</rank>
+                <year>2008</year>
+                <gdppc>141100</gdppc>
+                <neighbor name="Austria" direction="E"/>
+                <neighbor name="Switzerland" direction="W"/>
+            </country>
+            <country name="Singapore">
+                <rank>4</rank>
+                <year>2011</year>
+                <gdppc>59900</gdppc>
+                <neighbor name="Malaysia" direction="N"/>
+            </country>
+            <country name="Panama">
+                <rank>68</rank>
+                <year>2011</year>
+                <gdppc>13600</gdppc>
+                <neighbor name="Costa Rica" direction="W"/>
+                <neighbor name="Colombia" direction="E"/>
+            </country>
+        </data>''')
+        self.assertEqual(traverse_obj(etree, ''), etree,
+                         msg='empty str key should return the element itself')
+        self.assertEqual(traverse_obj(etree, 'country'), list(etree),
+                         msg='str key should return all children with that tag name')
+        self.assertEqual(traverse_obj(etree, Ellipsis), list(etree),
+                         msg='`...` as key should return all children')
+        self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
+                         msg='function as key should get element as value')
+        self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
+                         msg='function as key should get index as key')
+        self.assertEqual(traverse_obj(etree, 0), etree[0],
+                         msg='int key should return the nth child')
+        self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
+                         ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
+                         msg='`@<attribute>` at end of path should give that attribute')
+        self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
+                         msg='`@<nonexistent>` at end of path should give `None`')
+        self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
+                         msg='`@` should give the full attribute dict')
+        self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
+                         msg='`text()` at end of path should give the inner text')
+        self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
+                         msg='full python xpath features should be supported')
+        self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
+                         msg='special transformations should act on current element')
+        self.assertEqual(traverse_obj(etree, ('country', 0, Ellipsis, 'text()', T(int_or_none))), [1, 2008, 141100],
+                         msg='special transformations should act on current element')
+
+    def test_traversal_unbranching(self):
+        self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), all]), [100, 1.2],
+                         msg='`all` should give all results as list')
+        self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), any]), 100,
+                         msg='`any` should give the first result')
+        self.assertEqual(traverse_obj(_TEST_DATA, [100, all]), [100],
+                         msg='`all` should give list if non branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, [100, any]), 100,
+                         msg='`any` should give single item if non branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]), [100],
+                         msg='`all` should filter `None` and empty dict')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]), 100,
+                         msg='`any` should filter `None` and empty dict')
+        self.assertEqual(traverse_obj(_TEST_DATA, [{
+            'all': [('dict', 'None', 100, 1.2), all],
+            'any': [('dict', 'None', 100, 1.2), any],
+        }]), {'all': [100, 1.2], 'any': 100},
+            msg='`all`/`any` should apply to each dict path separately')
+        self.assertEqual(traverse_obj(_TEST_DATA, [{
+            'all': [('dict', 'None', 100, 1.2), all],
+            'any': [('dict', 'None', 100, 1.2), any],
+        }], get_all=False), {'all': [100, 1.2], 'any': 100},
+            msg='`all`/`any` should apply to dict regardless of `get_all`')
+        self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, T(float)]), None,
+                      msg='`all` should reset branching status')
+        self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, T(float)]), None,
+                      msg='`any` should reset branching status')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, Ellipsis, T(float)]), [1.2],
+                         msg='`all` should allow further branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, Ellipsis, 'index']), [0, 1],
+                         msg='`any` should allow further branching')
+
+    def test_traversal_morsel(self):
+        values = {
+            'expires': 'a',
+            'path': 'b',
+            'comment': 'c',
+            'domain': 'd',
+            'max-age': 'e',
+            'secure': 'f',
+            'httponly': 'g',
+            'version': 'h',
+            'samesite': 'i',
+        }
+        # SameSite added in Py3.8, breaks .update for 3.5-3.7
+        if sys.version_info < (3, 8):
+            del values['samesite']
+        morsel = compat_http_cookies.Morsel()
+        morsel.set(str('item_key'), 'item_value', 'coded_value')
+        morsel.update(values)
+        values['key'] = str('item_key')
+        values['value'] = 'item_value'
+        values = dict((str(k), v) for k, v in values.items())
+        # make test pass even without ordered dict
+        value_set = set(values.values())
+
+        for key, value in values.items():
+            self.assertEqual(traverse_obj(morsel, key), value,
+                             msg='Morsel should provide access to all values')
+        self.assertEqual(set(traverse_obj(morsel, Ellipsis)), value_set,
+                         msg='`...` should yield all values')
+        self.assertEqual(set(traverse_obj(morsel, lambda k, v: True)), value_set,
+                         msg='function key should yield all values')
+        self.assertIs(traverse_obj(morsel, [(None,), any]), morsel,
+                      msg='Morsel should not be implicitly changed to dict on usage')
+
+    def test_get_first(self):
+        self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
+
+    def test_dict_get(self):
+        FALSE_VALUES = {
+            'none': None,
+            'false': False,
+            'zero': 0,
+            'empty_string': '',
+            'empty_list': [],
+        }
+        d = FALSE_VALUES.copy()
+        d['a'] = 42
+        self.assertEqual(dict_get(d, 'a'), 42)
+        self.assertEqual(dict_get(d, 'b'), None)
+        self.assertEqual(dict_get(d, 'b', 42), 42)
+        self.assertEqual(dict_get(d, ('a', )), 42)
+        self.assertEqual(dict_get(d, ('b', 'a', )), 42)
+        self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
+        self.assertEqual(dict_get(d, ('b', 'c', )), None)
+        self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
+        for key, false_value in FALSE_VALUES.items():
+            self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
+            self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index 179d21cf5..de7fe80b8 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -14,7 +14,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import io
 import itertools
 import json
-import re
 import xml.etree.ElementTree
 
 from youtube_dl.utils import (
@@ -28,7 +27,6 @@ from youtube_dl.utils import (
     DateRange,
     detect_exe_version,
     determine_ext,
-    dict_get,
     encode_base_n,
     encode_compat_str,
     encodeFilename,
@@ -44,7 +42,6 @@ from youtube_dl.utils import (
     get_element_by_attribute,
     get_elements_by_class,
     get_elements_by_attribute,
-    get_first,
     InAdvancePagedList,
     int_or_none,
     intlist_to_bytes,
@@ -84,14 +81,11 @@ from youtube_dl.utils import (
     sanitized_Request,
     shell_quote,
     smuggle_url,
-    str_or_none,
     str_to_int,
     strip_jsonp,
     strip_or_none,
     subtitles_filename,
-    T,
     timeconvert,
-    traverse_obj,
     try_call,
     unescapeHTML,
     unified_strdate,
@@ -123,7 +117,6 @@ from youtube_dl.compat import (
     compat_chr,
     compat_etree_fromstring,
     compat_getenv,
-    compat_http_cookies,
     compat_os_name,
     compat_setenv,
     compat_str,
@@ -1685,481 +1678,5 @@ Line 1
             from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
 
 
-class TestTraversal(unittest.TestCase):
-    str = compat_str
-    _TEST_DATA = {
-        100: 100,
-        1.2: 1.2,
-        'str': 'str',
-        'None': None,
-        '...': Ellipsis,
-        'urls': [
-            {'index': 0, 'url': 'https://www.example.com/0'},
-            {'index': 1, 'url': 'https://www.example.com/1'},
-        ],
-        'data': (
-            {'index': 2},
-            {'index': 3},
-        ),
-        'dict': {},
-    }
-
-    # yt-dlp shim
-    def assertCountEqual(self, expected, got, msg='count should be the same'):
-        return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
-
-    def assertMaybeCountEqual(self, *args, **kwargs):
-        if sys.version_info < (3, 7):
-            # random dict order
-            return self.assertCountEqual(*args, **kwargs)
-        else:
-            return self.assertEqual(*args, **kwargs)
-
-    def test_traverse_obj(self):
-        str = self.str
-        _TEST_DATA = self._TEST_DATA
-
-        # define a pukka Iterable
-        def iter_range(stop):
-            for from_ in range(stop):
-                yield from_
-
-        # Test base functionality
-        self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
-                         msg='allow tuple path')
-        self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
-                         msg='allow list path')
-        self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
-                         msg='allow iterable path')
-        self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
-                         msg='single items should be treated as a path')
-        self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
-        self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
-        self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
-
-        # Test Ellipsis behavior
-        self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
-                              (item for item in _TEST_DATA.values() if item not in (None, {})),
-                              msg='`...` should give all non-discarded values')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
-                              msg='`...` selection for dicts should select all values')
-        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
-                         ['https://www.example.com/0', 'https://www.example.com/1'],
-                         msg='nested `...` queries should work')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
-                              msg='`...` query result should be flattened')
-        self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
-                         msg='`...` should accept iterables')
-
-        # Test function as key
-        self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
-                         [_TEST_DATA['urls']],
-                         msg='function as query key should perform a filter based on (key, value)')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
-                              msg='exceptions in the query function should be caught')
-        self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
-                         msg='function key should accept iterables')
-        if __debug__:
-            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
-                traverse_obj(_TEST_DATA, lambda a: Ellipsis)
-            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
-                traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
-
-        # Test set as key (transformation/type, like `expected_type`)
-        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
-                         msg='Function in set should be a transformation')
-        self.assertEqual(traverse_obj(_TEST_DATA, ('fail', T(lambda _: 'const'))), 'const',
-                         msg='Function in set should always be called')
-        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
-                         msg='Type in set should be a type filter')
-        self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str, int))), [100, 'str'],
-                                   msg='Multiple types in set should be a type filter')
-        self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
-                         msg='A single set should be wrapped into a path')
-        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
-                         msg='Transformation function should not raise')
-        self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
-                                   [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
-                                   msg='Function in set should be a transformation')
-        if __debug__:
-            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
-                traverse_obj(_TEST_DATA, set())
-            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
-                traverse_obj(_TEST_DATA, set((str.upper, str)))
-
-        # Test `slice` as a key
-        _SLICE_DATA = [0, 1, 2, 3, 4]
-        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
-                         msg='slice on a dictionary should not throw')
-        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
-                         msg='slice key should apply slice to sequence')
-        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
-                         msg='slice key should apply slice to sequence')
-        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
-                         msg='slice key should apply slice to sequence')
-
-        # Test alternative paths
-        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
-                         msg='multiple `paths` should be treated as alternative paths')
-        self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
-                         msg='alternatives should exit early')
-        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
-                         msg='alternatives should return `default` if exhausted')
-        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
-                         msg='alternatives should track their own branching return')
-        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
-                         msg='alternatives on empty objects should search further')
-
-        # Test branch and path nesting
-        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
-                         msg='tuple as key should be treated as branches')
-        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
-                         msg='list as key should be treated as branches')
-        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
-                         msg='double nesting in path should be treated as paths')
-        self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
-                         msg='do not fail early on branching')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
-                              ['https://www.example.com/0', 'https://www.example.com/1'],
-                              msg='triple nesting in path should be treated as branches')
-        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
-                         ['https://www.example.com/0', 'https://www.example.com/1'],
-                         msg='ellipsis as branch path start gets flattened')
-
-        # Test dictionary as key
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
-                         msg='dict key should result in a dict with the same keys')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
-                         {0: 'https://www.example.com/0'},
-                         msg='dict key should allow paths')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
-                         {0: ['https://www.example.com/0']},
-                         msg='tuple in dict path should be treated as branches')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
-                         {0: ['https://www.example.com/0']},
-                         msg='double nesting in dict path should be treated as paths')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
-                         {0: ['https://www.example.com/1', 'https://www.example.com/0']},
-                         msg='triple nesting in dict path should be treated as branches')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
-                         msg='remove `None` values when top level dict key fails')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
-                         msg='use `default` if key fails and `default`')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
-                         msg='remove empty values when dict key')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
-                         msg='use `default` when dict key and a default')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
-                         msg='remove empty values when nested dict key fails')
-        self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
-                         msg='default to dict if pruned')
-        self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
-                         msg='default to dict if pruned and default is given')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
-                         msg='use nested `default` when nested dict key fails and `default`')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
-                         msg='remove key if branch in dict key not successful')
-
-        # Testing default parameter behavior
-        _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
-                         msg='default value should be `None`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
-                         msg='chained fails should result in default')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
-                         msg='should not short cirquit on `None`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
-                         msg='invalid dict key should result in `default`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
-                         msg='`None` is a deliberate sentinel and should become `default`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
-                         msg='`IndexError` should result in `default`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
-                         msg='if branched but not successful return `default` if defined, not `[]`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
-                         msg='if branched but not successful return `default` even if `default` is `None`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
-                         msg='if branched but not successful return `[]`, not `default`')
-        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
-                         msg='if branched but object is empty return `[]`, not `default`')
-        self.assertEqual(traverse_obj(None, Ellipsis), [],
-                         msg='if branched but object is `None` return `[]`, not `default`')
-        self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
-                         msg='if branched but state is `None` return `[]`, not `default`')
-
-        branching_paths = [
-            ('fail', Ellipsis),
-            (Ellipsis, 'fail'),
-            100 * ('fail',) + (Ellipsis,),
-            (Ellipsis,) + 100 * ('fail',),
-        ]
-        for branching_path in branching_paths:
-            self.assertEqual(traverse_obj({}, branching_path), [],
-                             msg='if branched but state is `None`, return `[]` (not `default`)')
-            self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
-                             msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
-            self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
-                             msg='if branching in last alternative and previous did match, return single value')
-            self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
-                             msg='if branching in first alternative and non-branching path does match, return single value')
-            self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
-                             msg='if branching in first alternative and non-branching path does not match, return `default`')
-
-        # Testing expected_type behavior
-        _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
-                         'str', msg='accept matching `expected_type` type')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
-                         None, msg='reject non-matching `expected_type` type')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
-                         '0', msg='transform type using type function')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
-                         None, msg='wrap expected_type function in try_call')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
-                         ['str'], msg='eliminate items that expected_type fails on')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
-                         {0: 100}, msg='type as expected_type should filter dict values')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
-                         {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
-        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
-                         1, msg='expected_type should not filter non-final dict values')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
-                         {0: {0: 100}}, msg='expected_type should transform deep dict values')
-        self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
-                         [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
-        self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
-                         [4], msg='expected_type regression for type matching in tuple branching')
-        self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
-                         [], msg='expected_type regression for type matching in dict result')
-
-        # Test get_all behavior
-        _GET_ALL_DATA = {'key': [0, 1, 2]}
-        self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
-                         msg='if not `get_all`, return only first matching value')
-        self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
-                         msg='do not overflatten if not `get_all`')
-
-        # Test casesense behavior
-        _CASESENSE_DATA = {
-            'KeY': 'value0',
-            0: {
-                'KeY': 'value1',
-                0: {'KeY': 'value2'},
-            },
-            # FULLWIDTH LATIN CAPITAL LETTER K
-            '\uff2bey': 'value3',
-        }
-        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
-                         msg='dict keys should be case sensitive unless `casesense`')
-        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
-                                      casesense=False), 'value0',
-                         msg='allow non matching key case if `casesense`')
-        self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey',  # FULLWIDTH LATIN SMALL LETTER K
-                                      casesense=False), 'value3',
-                         msg='allow non matching Unicode key case if `casesense`')
-        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
-                                      casesense=False), ['value1'],
-                         msg='allow non matching key case in branch if `casesense`')
-        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
-                                      casesense=False), ['value2'],
-                         msg='allow non matching key case in branch path if `casesense`')
-
-        # Test traverse_string behavior
-        _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
-                         msg='do not traverse into string if not `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
-                                      _traverse_string=True), 's',
-                         msg='traverse into string if `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
-                                      _traverse_string=True), '.',
-                         msg='traverse into converted data if `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
-                                      _traverse_string=True), 'str',
-                         msg='`...` should result in string (same value) if `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
-                                      _traverse_string=True), 'sr',
-                         msg='`slice` should result in string if `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
-                                      _traverse_string=True), 'str',
-                         msg='function should result in string if `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
-                                      _traverse_string=True), ['s', 'r'],
-                         msg='branching should result in list if `traverse_string`')
-        self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
-                         msg='branching should result in list if `traverse_string`')
-        self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
-                         msg='branching should result in list if `traverse_string`')
-        self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
-                         msg='branching should result in list if `traverse_string`')
-
-        # Test re.Match as input obj
-        mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
-        self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
-                         msg='`...` on a `re.Match` should give its `groups()`')
-        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
-                         msg='function on a `re.Match` should give groupno, value starting at 0')
-        self.assertEqual(traverse_obj(mobj, 'group'), '3',
-                         msg='str key on a `re.Match` should give group with that name')
-        self.assertEqual(traverse_obj(mobj, 2), '3',
-                         msg='int key on a `re.Match` should give group with that name')
-        self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
-                         msg='str key on a `re.Match` should respect casesense')
-        self.assertEqual(traverse_obj(mobj, 'fail'), None,
-                         msg='failing str key on a `re.Match` should return `default`')
-        self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
-                         msg='failing str key on a `re.Match` should return `default`')
-        self.assertEqual(traverse_obj(mobj, 8), None,
-                         msg='failing int key on a `re.Match` should return `default`')
-        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
-                         msg='function on a `re.Match` should give group name as well')
-
-        # Test xml.etree.ElementTree.Element as input obj
-        etree = compat_etree_fromstring('''<?xml version="1.0"?>
-        <data>
-            <country name="Liechtenstein">
-                <rank>1</rank>
-                <year>2008</year>
-                <gdppc>141100</gdppc>
-                <neighbor name="Austria" direction="E"/>
-                <neighbor name="Switzerland" direction="W"/>
-            </country>
-            <country name="Singapore">
-                <rank>4</rank>
-                <year>2011</year>
-                <gdppc>59900</gdppc>
-                <neighbor name="Malaysia" direction="N"/>
-            </country>
-            <country name="Panama">
-                <rank>68</rank>
-                <year>2011</year>
-                <gdppc>13600</gdppc>
-                <neighbor name="Costa Rica" direction="W"/>
-                <neighbor name="Colombia" direction="E"/>
-            </country>
-        </data>''')
-        self.assertEqual(traverse_obj(etree, ''), etree,
-                         msg='empty str key should return the element itself')
-        self.assertEqual(traverse_obj(etree, 'country'), list(etree),
-                         msg='str key should return all children with that tag name')
-        self.assertEqual(traverse_obj(etree, Ellipsis), list(etree),
-                         msg='`...` as key should return all children')
-        self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
-                         msg='function as key should get element as value')
-        self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
-                         msg='function as key should get index as key')
-        self.assertEqual(traverse_obj(etree, 0), etree[0],
-                         msg='int key should return the nth child')
-        self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
-                         ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
-                         msg='`@<attribute>` at end of path should give that attribute')
-        self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
-                         msg='`@<nonexistent>` at end of path should give `None`')
-        self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
-                         msg='`@` should give the full attribute dict')
-        self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
-                         msg='`text()` at end of path should give the inner text')
-        self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
-                         msg='full python xpath features should be supported')
-        self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
-                         msg='special transformations should act on current element')
-        self.assertEqual(traverse_obj(etree, ('country', 0, Ellipsis, 'text()', T(int_or_none))), [1, 2008, 141100],
-                         msg='special transformations should act on current element')
-
-    def test_traversal_unbranching(self):
-        # str = self.str
-        _TEST_DATA = self._TEST_DATA
-
-        self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), all]), [100, 1.2],
-                         msg='`all` should give all results as list')
-        self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), any]), 100,
-                         msg='`any` should give the first result')
-        self.assertEqual(traverse_obj(_TEST_DATA, [100, all]), [100],
-                         msg='`all` should give list if non branching')
-        self.assertEqual(traverse_obj(_TEST_DATA, [100, any]), 100,
-                         msg='`any` should give single item if non branching')
-        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]), [100],
-                         msg='`all` should filter `None` and empty dict')
-        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]), 100,
-                         msg='`any` should filter `None` and empty dict')
-        self.assertEqual(traverse_obj(_TEST_DATA, [{
-            'all': [('dict', 'None', 100, 1.2), all],
-            'any': [('dict', 'None', 100, 1.2), any],
-        }]), {'all': [100, 1.2], 'any': 100},
-            msg='`all`/`any` should apply to each dict path separately')
-        self.assertEqual(traverse_obj(_TEST_DATA, [{
-            'all': [('dict', 'None', 100, 1.2), all],
-            'any': [('dict', 'None', 100, 1.2), any],
-        }], get_all=False), {'all': [100, 1.2], 'any': 100},
-            msg='`all`/`any` should apply to dict regardless of `get_all`')
-        self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, T(float)]), None,
-                      msg='`all` should reset branching status')
-        self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, T(float)]), None,
-                      msg='`any` should reset branching status')
-        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, Ellipsis, T(float)]), [1.2],
-                         msg='`all` should allow further branching')
-        self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, Ellipsis, 'index']), [0, 1],
-                         msg='`any` should allow further branching')
-
-    def test_traversal_morsel(self):
-        values = {
-            'expires': 'a',
-            'path': 'b',
-            'comment': 'c',
-            'domain': 'd',
-            'max-age': 'e',
-            'secure': 'f',
-            'httponly': 'g',
-            'version': 'h',
-            'samesite': 'i',
-        }
-        # SameSite added in Py3.8, breaks .update for 3.5-3.7
-        if sys.version_info < (3, 8):
-            del values['samesite']
-        morsel = compat_http_cookies.Morsel()
-        morsel.set(str('item_key'), 'item_value', 'coded_value')
-        morsel.update(values)
-        values['key'] = str('item_key')
-        values['value'] = 'item_value'
-        values = dict((str(k), v) for k, v in values.items())
-        # make test pass even without ordered dict
-        value_set = set(values.values())
-
-        for key, value in values.items():
-            self.assertEqual(traverse_obj(morsel, key), value,
-                             msg='Morsel should provide access to all values')
-        self.assertEqual(set(traverse_obj(morsel, Ellipsis)), value_set,
-                         msg='`...` should yield all values')
-        self.assertEqual(set(traverse_obj(morsel, lambda k, v: True)), value_set,
-                         msg='function key should yield all values')
-        self.assertIs(traverse_obj(morsel, [(None,), any]), morsel,
-                      msg='Morsel should not be implicitly changed to dict on usage')
-
-    def test_get_first(self):
-        self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
-
-    def test_dict_get(self):
-        FALSE_VALUES = {
-            'none': None,
-            'false': False,
-            'zero': 0,
-            'empty_string': '',
-            'empty_list': [],
-        }
-        d = FALSE_VALUES.copy()
-        d['a'] = 42
-        self.assertEqual(dict_get(d, 'a'), 42)
-        self.assertEqual(dict_get(d, 'b'), None)
-        self.assertEqual(dict_get(d, 'b', 42), 42)
-        self.assertEqual(dict_get(d, ('a', )), 42)
-        self.assertEqual(dict_get(d, ('b', 'a', )), 42)
-        self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
-        self.assertEqual(dict_get(d, ('b', 'c', )), None)
-        self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
-        for key, false_value in FALSE_VALUES.items():
-            self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
-            self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/traversal.py b/youtube_dl/traversal.py
new file mode 100644
index 000000000..834cfef7f
--- /dev/null
+++ b/youtube_dl/traversal.py
@@ -0,0 +1,10 @@
+# coding: utf-8
+
+# TODO: move these utils.fns here and move import to utils
+# flake8: noqa
+from .utils import (
+    dict_get,
+    get_first,
+    T,
+    traverse_obj,
+)

From 768ccccd9b18bc48d129b12d14eace4ebb3655d8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 May 2024 15:59:34 +0100
Subject: [PATCH 17/48] [compat] Avoid type comparison in `compat_ord` NB This
 isn't actually a compat fn; it should be utils.int_from_int_or_char

---
 youtube_dl/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 0371896ab..ed1a33cf2 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2970,7 +2970,7 @@ except (AssertionError, UnicodeEncodeError):
 
 
 def compat_ord(c):
-    if type(c) is int:
+    if isinstance(c, int):
         return c
     else:
         return ord(c)

From 21924742f79ccbd62d16ef4120518c6a5da8614e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 Apr 2024 18:57:44 +0100
Subject: [PATCH 18/48] [InfoExtractor] Misc yt-dlp back-ports, etc * add
 _yes_playlist() method * avoid crash using _NETRC_MACHINE * use
 _search_json() in _search_nextjs_data() * _search_nextjs_data() default is
 JSON, not text * test for above

---
 test/test_InfoExtractor.py     |  3 ++
 youtube_dl/extractor/common.py | 63 +++++++++++++++++++++++++---------
 2 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index d55d6ad54..09100a1d6 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -153,6 +153,9 @@ class TestInfoExtractor(unittest.TestCase):
 '''
         search = self.ie._search_nextjs_data(html, 'testID')
         self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
+        search = self.ie._search_nextjs_data(
+            'no next.js data here, move along', 'testID', default={'status': 0})
+        self.assertEqual(search['status'], 0)
 
     def test_search_nuxt_data(self):
         html = '''
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7fae9e57b..b10e84416 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1169,10 +1169,10 @@ class InfoExtractor(object):
     def _get_netrc_login_info(self, netrc_machine=None):
         username = None
         password = None
-        netrc_machine = netrc_machine or self._NETRC_MACHINE
 
         if self._downloader.params.get('usenetrc', False):
             try:
+                netrc_machine = netrc_machine or self._NETRC_MACHINE
                 info = netrc.netrc().authenticators(netrc_machine)
                 if info is not None:
                     username = info[0]
@@ -1180,7 +1180,7 @@ class InfoExtractor(object):
                 else:
                     raise netrc.NetrcParseError(
                         'No authenticators for %s' % netrc_machine)
-            except (IOError, netrc.NetrcParseError) as err:
+            except (AttributeError, IOError, netrc.NetrcParseError) as err:
                 self._downloader.report_warning(
                     'parsing .netrc: %s' % error_to_compat_str(err))
 
@@ -1490,14 +1490,18 @@ class InfoExtractor(object):
         return dict((k, v) for k, v in info.items() if v is not None)
 
     def _search_nextjs_data(self, webpage, video_id, **kw):
-        nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal'))
-        kw.pop('transform_source', None)
-        next_data = self._search_regex(
-            r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''',
-            webpage, 'next.js data', group='nd', **kw)
-        if not next_data:
-            return {}
-        return self._parse_json(next_data, video_id, **nkw)
+        # ..., *, transform_source=None, fatal=True, default=NO_DEFAULT
+
+        # TODO: remove this backward compat
+        default = kw.get('default', NO_DEFAULT)
+        if default == '{}':
+            kw['default'] = {}
+            kw = compat_kwargs(kw)
+
+        return self._search_json(
+            r'''<script\s[^>]*?\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>''',
+            webpage, 'next.js data', video_id, end_pattern='</script>',
+            **kw)
 
     def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
         """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
@@ -3296,12 +3300,16 @@ class InfoExtractor(object):
         return ret
 
     @classmethod
-    def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
-        """ Merge two subtitle dictionaries, language by language. """
-        ret = dict(subtitle_dict1)
-        for lang in subtitle_dict2:
-            ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
-        return ret
+    def _merge_subtitles(cls, subtitle_dict1, *subtitle_dicts, **kwargs):
+        """ Merge subtitle dictionaries, language by language. """
+
+        # ..., * , target=None
+        target = kwargs.get('target') or dict(subtitle_dict1)
+
+        for subtitle_dict in subtitle_dicts:
+            for lang in subtitle_dict:
+                target[lang] = cls._merge_subtitle_items(target.get(lang, []), subtitle_dict[lang])
+        return target
 
     def extract_automatic_captions(self, *args, **kwargs):
         if (self._downloader.params.get('writeautomaticsub', False)
@@ -3334,6 +3342,29 @@ class InfoExtractor(object):
     def _generic_title(self, url):
         return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
 
+    def _yes_playlist(self, playlist_id, video_id, *args, **kwargs):
+        # smuggled_data=None, *, playlist_label='playlist', video_label='video'
+        smuggled_data = args[0] if len(args) == 1 else kwargs.get('smuggled_data')
+        playlist_label = kwargs.get('playlist_label', 'playlist')
+        video_label = kwargs.get('video_label', 'video')
+
+        if not playlist_id or not video_id:
+            return not video_id
+
+        no_playlist = (smuggled_data or {}).get('force_noplaylist')
+        if no_playlist is not None:
+            return not no_playlist
+
+        video_id = '' if video_id is True else ' ' + video_id
+        noplaylist = self.get_param('noplaylist')
+        self.to_screen(
+            'Downloading just the {0}{1} because of --no-playlist'.format(video_label, video_id)
+            if noplaylist else
+            'Downloading {0}{1} - add --no-playlist to download just the {2}{3}'.format(
+                playlist_label, '' if playlist_id is True else ' ' + playlist_id,
+                video_label, video_id))
+        return not noplaylist
+
 
 class SearchInfoExtractor(InfoExtractor):
     """

From 88bd8b9f87f6f4956f11d32f3a7f23f20283357b Mon Sep 17 00:00:00 2001
From: kmnx <max.hampel@gmail.com>
Date: Tue, 11 Jun 2024 13:38:24 +0200
Subject: [PATCH 19/48] [mixcloud] updated mixcloud API server address (#32557)

* updated mixcloud API server address
* fix tests
* etc

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/mixcloud.py | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 69319857d..2b5e2c15c 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import itertools
@@ -10,7 +11,7 @@ from ..compat import (
     compat_ord,
     compat_str,
     compat_urllib_parse_unquote,
-    compat_zip
+    compat_zip as zip,
 )
 from ..utils import (
     int_or_none,
@@ -24,7 +25,7 @@ class MixcloudBaseIE(InfoExtractor):
     def _call_api(self, object_type, object_fields, display_id, username, slug=None):
         lookup_key = object_type + 'Lookup'
         return self._download_json(
-            'https://www.mixcloud.com/graphql', display_id, query={
+            'https://app.mixcloud.com/graphql', display_id, query={
                 'query': '''{
   %s(lookup: {username: "%s"%s}) {
     %s
@@ -44,7 +45,7 @@ class MixcloudIE(MixcloudBaseIE):
             'ext': 'm4a',
             'title': 'Cryptkeeper',
             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
-            'uploader': 'Daniel Holbach',
+            'uploader': 'dholbach',  # was: 'Daniel Holbach',
             'uploader_id': 'dholbach',
             'thumbnail': r're:https?://.*\.jpg',
             'view_count': int,
@@ -57,7 +58,7 @@ class MixcloudIE(MixcloudBaseIE):
             'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
             'ext': 'mp3',
             'title': 'Caribou 7 inch Vinyl Mix & Chat',
-            'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
+            'description': r're:Last week Dan Snaith aka Caribou swung by the Brownswood.{136}',
             'uploader': 'Gilles Peterson Worldwide',
             'uploader_id': 'gillespeterson',
             'thumbnail': 're:https?://.*',
@@ -65,6 +66,23 @@ class MixcloudIE(MixcloudBaseIE):
             'timestamp': 1422987057,
             'upload_date': '20150203',
         },
+        'params': {
+            'skip_download': '404 not found',
+        },
+    }, {
+        'url': 'https://www.mixcloud.com/gillespeterson/carnival-m%C3%BAsica-popular-brasileira-mix/',
+        'info_dict': {
+            'id': 'gillespeterson_carnival-música-popular-brasileira-mix',
+            'ext': 'm4a',
+            'title': 'Carnival Música Popular Brasileira Mix',
+            'description': r're:Gilles was recently in Brazil to play at Boiler Room.{208}',
+            'timestamp': 1454347174,
+            'upload_date': '20160201',
+            'uploader': 'Gilles Peterson Worldwide',
+            'uploader_id': 'gillespeterson',
+            'thumbnail': 're:https?://.*',
+            'view_count': int,
+        },
     }, {
         'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
         'only_matching': True,
@@ -76,10 +94,10 @@ class MixcloudIE(MixcloudBaseIE):
         """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
         return ''.join([
             compat_chr(compat_ord(ch) ^ compat_ord(k))
-            for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
+            for ch, k in zip(ciphertext, itertools.cycle(key))])
 
     def _real_extract(self, url):
-        username, slug = re.match(self._VALID_URL, url).groups()
+        username, slug = self._match_valid_url(url).groups()
         username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
         track_id = '%s_%s' % (username, slug)
 

From b4ff08bd2d12b6c91f4d8c83a7820fc6db31033d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 1 Jun 2024 03:23:37 +0100
Subject: [PATCH 20/48] [core] Safer handling of nested playlist data

---
 youtube_dl/YoutubeDL.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 6f2aba5ac..a2b45859c 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1039,8 +1039,8 @@ class YoutubeDL(object):
         elif result_type in ('playlist', 'multi_video'):
             # Protect from infinite recursion due to recursively nested playlists
             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
-            webpage_url = ie_result['webpage_url']
-            if webpage_url in self._playlist_urls:
+            webpage_url = ie_result.get('webpage_url')  # not all pl/mv have this
+            if webpage_url and webpage_url in self._playlist_urls:
                 self.to_screen(
                     '[download] Skipping already downloaded playlist: %s'
                     % ie_result.get('title') or ie_result.get('id'))
@@ -1048,6 +1048,10 @@ class YoutubeDL(object):
 
             self._playlist_level += 1
             self._playlist_urls.add(webpage_url)
+            new_result = dict((k, v) for k, v in extra_info.items() if k not in ie_result)
+            if new_result:
+                new_result.update(ie_result)
+                ie_result = new_result
             try:
                 return self.__process_playlist(ie_result, download)
             finally:

From 50f6c5668ac28a435a5c09e3d5ee7c13a50999f3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 1 Jun 2024 03:25:06 +0100
Subject: [PATCH 21/48] [core] Re-factor with `_fill_common_fields()` as used
 in yt-dlp

---
 youtube_dl/YoutubeDL.py | 41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index a2b45859c..dad44435f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1597,6 +1597,28 @@ class YoutubeDL(object):
         self.cookiejar.add_cookie_header(pr)
         return pr.get_header('Cookie')
 
+    def _fill_common_fields(self, info_dict, final=True):
+
+        for ts_key, date_key in (
+                ('timestamp', 'upload_date'),
+                ('release_timestamp', 'release_date'),
+        ):
+            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+                # see http://bugs.python.org/issue1646728)
+                try:
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
+                except (ValueError, OverflowError, OSError):
+                    pass
+
+        # Auto generate title fields corresponding to the *_number fields when missing
+        # in order to always have clean titles. This is very common for TV series.
+        if final:
+            for field in ('chapter', 'season', 'episode'):
+                if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+                    info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
     def process_video_result(self, info_dict, download=True):
         assert info_dict.get('_type', 'video') == 'video'
 
@@ -1664,24 +1686,7 @@ class YoutubeDL(object):
         if 'display_id' not in info_dict and 'id' in info_dict:
             info_dict['display_id'] = info_dict['id']
 
-        for ts_key, date_key in (
-                ('timestamp', 'upload_date'),
-                ('release_timestamp', 'release_date'),
-        ):
-            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
-                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
-                # see http://bugs.python.org/issue1646728)
-                try:
-                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
-                    info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
-                except (ValueError, OverflowError, OSError):
-                    pass
-
-        # Auto generate title fields corresponding to the *_number fields when missing
-        # in order to always have clean titles. This is very common for TV series.
-        for field in ('chapter', 'season', 'episode'):
-            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
-                info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+        self._fill_common_fields(info_dict)
 
         for cc_kind in ('subtitles', 'automatic_captions'):
             cc = info_dict.get(cc_kind)

From 3bde6a5752591f824096469fb9617be9d470df2c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Jun 2024 01:20:42 +0100
Subject: [PATCH 22/48] [test] Improve download test * skip reason can't be
 unicode in Py2 * remove duplicate assert...Equal functions

---
 test/helper.py        | 31 +++++--------------------------
 test/test_download.py | 22 +++++++++++++---------
 2 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/test/helper.py b/test/helper.py
index 5b7e3dfe2..6f2129eff 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -5,9 +5,9 @@ import hashlib
 import json
 import os.path
 import re
-import types
 import ssl
 import sys
+import types
 import unittest
 
 import youtube_dl.extractor
@@ -181,18 +181,18 @@ def expect_value(self, got, expected, field):
             op, _, expected_num = expected.partition(':')
             expected_num = int(expected_num)
             if op == 'mincount':
-                assert_func = assertGreaterEqual
+                assert_func = self.assertGreaterEqual
                 msg_tmpl = 'Expected %d items in field %s, but only got %d'
             elif op == 'maxcount':
-                assert_func = assertLessEqual
+                assert_func = self.assertLessEqual
                 msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
             elif op == 'count':
-                assert_func = assertEqual
+                assert_func = self.assertEqual
                 msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
             else:
                 assert False
             assert_func(
-                self, len(got), expected_num,
+                len(got), expected_num,
                 msg_tmpl % (expected_num, field, len(got)))
             return
         self.assertEqual(
@@ -262,27 +262,6 @@ def assertRegexpMatches(self, text, regexp, msg=None):
             self.assertTrue(m, msg)
 
 
-def assertGreaterEqual(self, got, expected, msg=None):
-    if not (got >= expected):
-        if msg is None:
-            msg = '%r not greater than or equal to %r' % (got, expected)
-        self.assertTrue(got >= expected, msg)
-
-
-def assertLessEqual(self, got, expected, msg=None):
-    if not (got <= expected):
-        if msg is None:
-            msg = '%r not less than or equal to %r' % (got, expected)
-        self.assertTrue(got <= expected, msg)
-
-
-def assertEqual(self, got, expected, msg=None):
-    if not (got == expected):
-        if msg is None:
-            msg = '%r not equal to %r' % (got, expected)
-        self.assertTrue(got == expected, msg)
-
-
 def expect_warnings(ydl, warnings_re):
     real_warning = ydl.report_warning
 
diff --git a/test/test_download.py b/test/test_download.py
index df8b370cf..f7d6a23bc 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -9,8 +9,6 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import (
-    assertGreaterEqual,
-    assertLessEqual,
     expect_warnings,
     get_params,
     gettestcases,
@@ -36,12 +34,20 @@ from youtube_dl.utils import (
     ExtractorError,
     error_to_compat_str,
     format_bytes,
+    IDENTITY,
+    preferredencoding,
     UnavailableVideoError,
 )
 from youtube_dl.extractor import get_info_extractor
 
 RETRIES = 3
 
+# Some unittest APIs require actual str
+if not isinstance('TEST', str):
+    _encode_str = lambda s: s.encode(preferredencoding())
+else:
+    _encode_str = IDENTITY
+
 
 class YoutubeDL(youtube_dl.YoutubeDL):
     def __init__(self, *args, **kwargs):
@@ -102,7 +108,7 @@ def generator(test_case, tname):
 
         def print_skipping(reason):
             print('Skipping %s: %s' % (test_case['name'], reason))
-            self.skipTest(reason)
+            self.skipTest(_encode_str(reason))
 
         if not ie.working():
             print_skipping('IE marked as not _WORKING')
@@ -187,16 +193,14 @@ def generator(test_case, tname):
                 expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
 
             if 'playlist_mincount' in test_case:
-                assertGreaterEqual(
-                    self,
+                self.assertGreaterEqual(
                     len(res_dict['entries']),
                     test_case['playlist_mincount'],
                     'Expected at least %d in playlist %s, but got only %d' % (
                         test_case['playlist_mincount'], test_case['url'],
                         len(res_dict['entries'])))
             if 'playlist_maxcount' in test_case:
-                assertLessEqual(
-                    self,
+                self.assertLessEqual(
                     len(res_dict['entries']),
                     test_case['playlist_maxcount'],
                     'Expected at most %d in playlist %s, but got %d' % (
@@ -243,8 +247,8 @@ def generator(test_case, tname):
                         if params.get('test'):
                             expected_minsize = max(expected_minsize, 10000)
                         got_fsize = os.path.getsize(tc_filename)
-                        assertGreaterEqual(
-                            self, got_fsize, expected_minsize,
+                        self.assertGreaterEqual(
+                            got_fsize, expected_minsize,
                             'Expected %s to be at least %s, but it\'s only %s ' %
                             (tc_filename, format_bytes(expected_minsize),
                                 format_bytes(got_fsize)))

From d95c0d203fa74c806a806806f7c5286ca532b936 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 1 Jun 2024 12:56:41 +0100
Subject: [PATCH 23/48] [ORF] Support on.orf.at, replacing `ORFTVthekIE` * add
 `ORFONIE`, back-porting yt-dlp PR https://github.com/yt-dlp/yt-dlp/pull/9113
 and friends: thx HobbyistDev, TuxCoder, seproDev * re-factor to support
 livestreams via new `ORFONliveIE`

---
 youtube_dl/extractor/extractors.py |   3 +-
 youtube_dl/extractor/orf.py        | 434 ++++++++++++++++++-----------
 2 files changed, 274 insertions(+), 163 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 03d035a27..8bc5dd6d4 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -898,7 +898,8 @@ from .ooyala import (
 )
 from .ora import OraTVIE
 from .orf import (
-    ORFTVthekIE,
+    ORFONIE,
+    ORFONLiveIE,
     ORFFM4IE,
     ORFFM4StoryIE,
     ORFOE1IE,
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 8d537d7ae..3854911bd 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -1,184 +1,30 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import base64
+import functools
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     clean_html,
     determine_ext,
     float_or_none,
-    HEADRequest,
     int_or_none,
+    merge_dicts,
     orderedSet,
+    parse_age_limit,
+    parse_iso8601,
     remove_end,
     str_or_none,
     strip_jsonp,
-    unescapeHTML,
+    txt_or_none,
     unified_strdate,
     url_or_none,
 )
+from ..traversal import T, traverse_obj
 
-
-class ORFTVthekIE(InfoExtractor):
-    IE_NAME = 'orf:tvthek'
-    IE_DESC = 'ORF TVthek'
-    _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
-
-    _TESTS = [{
-        'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
-        'playlist': [{
-            'md5': '2942210346ed779588f428a92db88712',
-            'info_dict': {
-                'id': '8896777',
-                'ext': 'mp4',
-                'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
-                'description': 'md5:c1272f0245537812d4e36419c207b67d',
-                'duration': 2668,
-                'upload_date': '20141208',
-            },
-        }],
-        'skip': 'Blocked outside of Austria / Germany',
-    }, {
-        'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
-        'info_dict': {
-            'id': '7982259',
-            'ext': 'mp4',
-            'title': 'Best of Ingrid Thurnher',
-            'upload_date': '20140527',
-            'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
-        },
-        'params': {
-            'skip_download': True,  # rtsp downloads
-        },
-        'skip': 'Blocked outside of Austria / Germany',
-    }, {
-        'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
-        'only_matching': True,
-    }, {
-        'url': 'http://tvthek.orf.at/profile/Universum/35429',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-
-        data_jsb = self._parse_json(
-            self._search_regex(
-                r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
-                webpage, 'playlist', group='json'),
-            playlist_id, transform_source=unescapeHTML)['playlist']['videos']
-
-        entries = []
-        for sd in data_jsb:
-            video_id, title = sd.get('id'), sd.get('title')
-            if not video_id or not title:
-                continue
-            video_id = compat_str(video_id)
-            formats = []
-            for fd in sd['sources']:
-                src = url_or_none(fd.get('src'))
-                if not src:
-                    continue
-                format_id_list = []
-                for key in ('delivery', 'quality', 'quality_string'):
-                    value = fd.get(key)
-                    if value:
-                        format_id_list.append(value)
-                format_id = '-'.join(format_id_list)
-                ext = determine_ext(src)
-                if ext == 'm3u8':
-                    m3u8_formats = self._extract_m3u8_formats(
-                        src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
-                    if any('/geoprotection' in f['url'] for f in m3u8_formats):
-                        self.raise_geo_restricted()
-                    formats.extend(m3u8_formats)
-                elif ext == 'f4m':
-                    formats.extend(self._extract_f4m_formats(
-                        src, video_id, f4m_id=format_id, fatal=False))
-                elif ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        src, video_id, mpd_id=format_id, fatal=False))
-                else:
-                    formats.append({
-                        'format_id': format_id,
-                        'url': src,
-                        'protocol': fd.get('protocol'),
-                    })
-
-            # Check for geoblocking.
-            # There is a property is_geoprotection, but that's always false
-            geo_str = sd.get('geoprotection_string')
-            if geo_str:
-                try:
-                    http_url = next(
-                        f['url']
-                        for f in formats
-                        if re.match(r'^https?://.*\.mp4$', f['url']))
-                except StopIteration:
-                    pass
-                else:
-                    req = HEADRequest(http_url)
-                    self._request_webpage(
-                        req, video_id,
-                        note='Testing for geoblocking',
-                        errnote=((
-                            'This video seems to be blocked outside of %s. '
-                            'You may want to try the streaming-* formats.')
-                            % geo_str),
-                        fatal=False)
-
-            self._check_formats(formats, video_id)
-            self._sort_formats(formats)
-
-            subtitles = {}
-            for sub in sd.get('subtitles', []):
-                sub_src = sub.get('src')
-                if not sub_src:
-                    continue
-                subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
-                    'url': sub_src,
-                })
-
-            upload_date = unified_strdate(sd.get('created_date'))
-
-            thumbnails = []
-            preview = sd.get('preview_image_url')
-            if preview:
-                thumbnails.append({
-                    'id': 'preview',
-                    'url': preview,
-                    'preference': 0,
-                })
-            image = sd.get('image_full_url')
-            if not image and len(data_jsb) == 1:
-                image = self._og_search_thumbnail(webpage)
-            if image:
-                thumbnails.append({
-                    'id': 'full',
-                    'url': image,
-                    'preference': 1,
-                })
-
-            entries.append({
-                '_type': 'video',
-                'id': video_id,
-                'title': title,
-                'formats': formats,
-                'subtitles': subtitles,
-                'description': sd.get('description'),
-                'duration': int_or_none(sd.get('duration_in_seconds')),
-                'upload_date': upload_date,
-                'thumbnails': thumbnails,
-            })
-
-        return {
-            '_type': 'playlist',
-            'entries': entries,
-            'id': playlist_id,
-        }
+k_float_or_none = functools.partial(float_or_none, scale=1000)
 
 
 class ORFRadioIE(InfoExtractor):
@@ -401,6 +247,7 @@ class ORFOE1IE(ORFRadioIE):
 class ORFIPTVIE(InfoExtractor):
     IE_NAME = 'orf:iptv'
     IE_DESC = 'iptv.ORF.at'
+    _WORKING = False  # URLs redirect to orf.at/
     _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
 
     _TEST = {
@@ -590,3 +437,266 @@ class ORFFM4StoryIE(InfoExtractor):
             })
 
         return self.playlist_result(entries)
+
+
+class ORFONBase(InfoExtractor):
+    _ENC_PFX = '3dSlfek03nsLKdj4Jsd'
+    _API_PATH = 'episode'
+
+    def _call_api(self, video_id, **kwargs):
+        encrypted_id = base64.b64encode('{0}{1}'.format(
+            self._ENC_PFX, video_id).encode('utf-8')).decode('ascii')
+        return self._download_json(
+            'https://api-tvthek.orf.at/api/v4.3/public/{0}/encrypted/{1}'.format(
+                self._API_PATH, encrypted_id),
+            video_id, **kwargs)
+
+    @classmethod
+    def _parse_metadata(cls, api_json):
+        return traverse_obj(api_json, {
+            'id': ('id', T(int), T(txt_or_none)),
+            'age_limit': ('age_classification', T(parse_age_limit)),
+            'duration': ((('exact_duration', T(k_float_or_none)),
+                          ('duration_second', T(float_or_none))),),
+            'title': (('title', 'headline'), T(txt_or_none)),
+            'description': (('description', 'teaser_text'), T(txt_or_none)),
+            # 'media_type': ('video_type', T(txt_or_none)),
+            'thumbnail': ('_embedded', 'image', 'public_urls', 'highlight_teaser', 'url', T(url_or_none)),
+            'timestamp': (('date', 'episode_date'), T(parse_iso8601)),
+            'release_timestamp': ('release_date', T(parse_iso8601)),
+            # 'modified_timestamp': ('updated_at', T(parse_iso8601)),
+        }, get_all=False)
+
+    def _extract_video(self, video_id, segment_id):
+        # Not a segmented episode: return single video
+        # Segmented episode without valid segment id: return entire playlist
+        # Segmented episode with valid segment id and yes-playlist: return entire playlist
+        # Segmented episode with valid segment id and no-playlist: return single video corresponding to segment id
+        # If a multi_video playlist would be returned, but an unsegmented source exists, that source is chosen instead.
+
+        api_json = self._call_api(video_id)
+
+        if traverse_obj(api_json, 'is_drm_protected'):
+            self.report_drm(video_id)
+
+        # updates formats, subtitles
+        def extract_sources(src_json, video_id):
+            for manifest_type in traverse_obj(src_json, ('sources', T(dict.keys), Ellipsis)):
+                for manifest_url in traverse_obj(src_json, ('sources', manifest_type, Ellipsis, 'src', T(url_or_none))):
+                    if manifest_type == 'hls':
+                        fmts, subs = self._extract_m3u8_formats(
+                            manifest_url, video_id, fatal=False, m3u8_id='hls',
+                            ext='mp4', entry_protocol='m3u8_native'), {}
+                        for f in fmts:
+                            if '_vo.' in f['url']:
+                                f['acodec'] = 'none'
+                    elif manifest_type == 'dash':
+                        fmts, subs = self._extract_mpd_formats_and_subtitles(
+                            manifest_url, video_id, fatal=False, mpd_id='dash')
+                    else:
+                        continue
+                    formats.extend(fmts)
+                    self._merge_subtitles(subs, target=subtitles)
+
+        formats, subtitles = [], {}
+        if segment_id is None:
+            extract_sources(api_json, video_id)
+        if not formats:
+            segments = traverse_obj(api_json, (
+                '_embedded', 'segments', lambda _, v: v['id']))
+            if len(segments) > 1 and segment_id is not None:
+                if not self._yes_playlist(video_id, segment_id, playlist_label='collection', video_label='segment'):
+                    segments = [next(s for s in segments if txt_or_none(s['id']) == segment_id)]
+
+            entries = []
+            for seg in segments:
+                formats, subtitles = [], {}
+                extract_sources(seg, segment_id)
+                self._sort_formats(formats)
+                entries.append(merge_dicts({
+                    'formats': formats,
+                    'subtitles': subtitles,
+                }, self._parse_metadata(seg), rev=True))
+            result = merge_dicts(
+                {'_type': 'multi_video' if len(entries) > 1 else 'playlist'},
+                self._parse_metadata(api_json),
+                self.playlist_result(entries, video_id))
+            # not yet processed in core for playlist/multi
+            self._downloader._fill_common_fields(result)
+            return result
+        else:
+            self._sort_formats(formats)
+
+        for sub_url in traverse_obj(api_json, (
+                '_embedded', 'subtitle',
+                ('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'),
+                T(url_or_none))):
+            self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles)
+
+        return merge_dicts({
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            # '_old_archive_ids': [self._downloader._make_archive_id({'ie_key': 'ORFTVthek', 'id': video_id})],
+        }, self._parse_metadata(api_json), rev=True)
+
+    def _real_extract(self, url):
+        video_id, segment_id = self._match_valid_url(url).group('id', 'segment')
+        webpage = self._download_webpage(url, video_id)
+
+        # ORF doesn't like 410 or 404
+        if self._search_regex(r'<div\b[^>]*>\s*(Nicht mehr verfügbar)\s*</div>', webpage, 'Availability', default=False):
+            raise ExtractorError('Content is no longer available', expected=True, video_id=video_id)
+
+        return merge_dicts({
+            'id': video_id,
+            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
+            'description': self._html_search_meta(
+                ['description', 'og:description', 'twitter:description'], webpage, default=None),
+        }, self._search_json_ld(webpage, video_id, default={}),
+            self._extract_video(video_id, segment_id),
+            rev=True)
+
+
+class ORFONIE(ORFONBase):
+    IE_NAME = 'orf:on'
+    _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)(?:/(?P<segment>\d+))?'
+    _TESTS = [{
+        'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
+        'info_dict': {
+            'id': '14210000',
+            'ext': 'mp4',
+            'duration': 2651.08,
+            'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
+            'title': 'School of Champions (4/8)',
+            'description': r're:(?s)Luca hat sein ganzes Leben in den Bergen Südtirols verbracht und ist bei seiner Mutter aufgewachsen, .{1029} Leo$',
+            # 'media_type': 'episode',
+            'timestamp': 1706558922,
+            'upload_date': '20240129',
+            'release_timestamp': 1706472362,
+            'release_date': '20240128',
+            # 'modified_timestamp': 1712756663,
+            # 'modified_date': '20240410',
+            # '_old_archive_ids': ['orftvthek 14210000'],
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }, {
+        'url': 'https://on.orf.at/video/3220355',
+        'md5': '925a93b2b9a37da5c9b979d7cf71aa2e',
+        'info_dict': {
+            'id': '3220355',
+            'ext': 'mp4',
+            'duration': 445.04,
+            'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png',
+            'title': '50 Jahre Burgenland: Der Festumzug',
+            'description': r're:(?s)Aus allen Landesteilen zogen festlich geschmückte Wagen und Musikkapellen .{270} Jenakowitsch$',
+            # 'media_type': 'episode',
+            'timestamp': 52916400,
+            'upload_date': '19710905',
+            'release_timestamp': 52916400,
+            'release_date': '19710905',
+            # 'modified_timestamp': 1498536049,
+            # 'modified_date': '20170627',
+            # '_old_archive_ids': ['orftvthek 3220355'],
+        },
+    }, {
+        # Video with multiple segments selecting the second segment
+        'url': 'https://on.orf.at/video/14226549/15639808/jugendbande-einbrueche-aus-langeweile',
+        'md5': 'fc151bba8c05ea77ab5693617e4a33d3',
+        'info_dict': {
+            'id': '15639808',
+            'ext': 'mp4',
+            'duration': 97.707,
+            'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0175/43/thumb_17442704_segments_highlight_teaser.jpg',
+            'title': 'Jugendbande: Einbrüche aus Langeweile',
+            'description': r're:Jugendbande: Einbrüche aus Langeweile \| Neuer Kinder- und .{259} Wanda$',
+            # 'media_type': 'segment',
+            'timestamp': 1715792400,
+            'upload_date': '20240515',
+            # 'modified_timestamp': 1715794394,
+            # 'modified_date': '20240515',
+            # '_old_archive_ids': ['orftvthek 15639808'],
+        },
+        'params': {
+            'noplaylist': True,
+            'format': 'bestvideo',
+        },
+    }, {
+        # Video with multiple segments and no combined version
+        'url': 'https://on.orf.at/video/14227864/formel-1-grosser-preis-von-monaco-2024',
+        'info_dict': {
+            '_type': 'multi_video',
+            'id': '14227864',
+            'duration': 18410.52,
+            'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/04/thumb_17503881_segments_highlight_teaser.jpg',
+            'title': 'Formel 1: Großer Preis von Monaco 2024',
+            'description': 'md5:aeeb010710ccf70ce28ccb4482243d4f',
+            # 'media_type': 'episode',
+            'timestamp': 1716721200,
+            'upload_date': '20240526',
+            'release_timestamp': 1716721802,
+            'release_date': '20240526',
+            # 'modified_timestamp': 1716884702,
+            # 'modified_date': '20240528',
+        },
+        'playlist_count': 42,
+        'skip': 'Gone: Nicht mehr verfügbar',
+    }, {
+        # Video with multiple segments, but with combined version
+        'url': 'https://on.orf.at/video/14228172',
+        'info_dict': {
+            'id': '14228172',
+            'ext': 'mp4',
+            'duration': 3294.878,
+            'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/29/thumb_17528242_segments_highlight_teaser.jpg',
+            'title': 'Willkommen Österreich mit Stermann & Grissemann',
+            'description': r're:Zum Saisonfinale freuen sich die urlaubsreifen Gastgeber Stermann und .{1863} Geschichten\.$',
+            # 'media_type': 'episode',
+            'timestamp': 1716926584,
+            'upload_date': '20240528',
+            'release_timestamp': 1716919202,
+            'release_date': '20240528',
+            # 'modified_timestamp': 1716968045,
+            # 'modified_date': '20240529',
+            # '_old_archive_ids': ['orftvthek 14228172'],
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+        'skip': 'Gone: Nicht mehr verfügbar',
+    }]
+
+
+class ORFONLiveIE(ORFONBase):
+    _ENC_PFX = '8876324jshjd7293ktd'
+    _API_PATH = 'livestream'
+    _VALID_URL = r'https?://on\.orf\.at/livestream/(?P<id>\d+)(?:/(?P<segment>\d+))?'
+    _TESTS = [{
+        'url': 'https://on.orf.at/livestream/14320204/pressekonferenz-neos-zu-aktuellen-entwicklungen',
+        'info_dict': {
+            'id': '14320204',
+            'ext': 'mp4',
+            'title': 'Pressekonferenz: Neos zu aktuellen Entwicklungen',
+            'description': r're:(?s)Neos-Chefin Beate Meinl-Reisinger informi.{598}ng\."',
+            'timestamp': 1716886335,
+            'upload_date': '20240528',
+            # 'modified_timestamp': 1712756663,
+            # 'modified_date': '20240410',
+            # '_old_archive_ids': ['orftvthek 14210000'],
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }]
+
+    @classmethod
+    def _parse_metadata(cls, api_json):
+        return merge_dicts(
+            super(ORFONLiveIE, cls)._parse_metadata(api_json),
+            traverse_obj(api_json, {
+                'timestamp': ('updated_at', T(parse_iso8601)),
+                'release_timestamp': ('start', T(parse_iso8601)),
+                'is_live': True,
+            }))

From e39466051f01411944bd657fe826b658a0df5af1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 1 Jun 2024 13:29:26 +0100
Subject: [PATCH 24/48] [ORF] Support sound.orf.at, updating `ORFRadioIE` *
 maintain support for xx.orf.at/player/... URLs * add `ORFRadioCollectionIE`
 to support playlists in ORF Sound * back-port and re-work `ORFPodcastIE` from
 https://github.com/yt-dlp/yt-dlp/pull/8486, thx Esokrates

---
 youtube_dl/extractor/extractors.py |  15 +-
 youtube_dl/extractor/orf.py        | 469 ++++++++++++++++-------------
 2 files changed, 268 insertions(+), 216 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 8bc5dd6d4..3da5f8020 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -900,20 +900,11 @@ from .ora import OraTVIE
 from .orf import (
     ORFONIE,
     ORFONLiveIE,
-    ORFFM4IE,
     ORFFM4StoryIE,
-    ORFOE1IE,
-    ORFOE3IE,
-    ORFNOEIE,
-    ORFWIEIE,
-    ORFBGLIE,
-    ORFOOEIE,
-    ORFSTMIE,
-    ORFKTNIE,
-    ORFSBGIE,
-    ORFTIRIE,
-    ORFVBGIE,
     ORFIPTVIE,
+    ORFPodcastIE,
+    ORFRadioIE,
+    ORFRadioCollectionIE,
 )
 from .outsidetv import OutsideTVIE
 from .packtpub import (
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 3854911bd..25c16c84d 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -9,17 +9,19 @@ from .common import InfoExtractor
 from ..utils import (
     clean_html,
     determine_ext,
+    ExtractorError,
     float_or_none,
     int_or_none,
     merge_dicts,
+    mimetype2ext,
     orderedSet,
     parse_age_limit,
     parse_iso8601,
     remove_end,
-    str_or_none,
     strip_jsonp,
     txt_or_none,
     unified_strdate,
+    update_url_query,
     url_or_none,
 )
 from ..traversal import T, traverse_obj
@@ -27,221 +29,280 @@ from ..traversal import T, traverse_obj
 k_float_or_none = functools.partial(float_or_none, scale=1000)
 
 
-class ORFRadioIE(InfoExtractor):
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_date = mobj.group('date')
-        show_id = mobj.group('show')
+class ORFRadioBase(InfoExtractor):
+    STATION_INFO = {
+        'fm4': ('fm4', 'fm4', 'orffm4'),
+        'noe': ('noe', 'oe2n', 'orfnoe'),
+        'wien': ('wie', 'oe2w', 'orfwie'),
+        'burgenland': ('bgl', 'oe2b', 'orfbgl'),
+        'ooe': ('ooe', 'oe2o', 'orfooe'),
+        'steiermark': ('stm', 'oe2st', 'orfstm'),
+        'kaernten': ('ktn', 'oe2k', 'orfktn'),
+        'salzburg': ('sbg', 'oe2s', 'orfsbg'),
+        'tirol': ('tir', 'oe2t', 'orftir'),
+        'vorarlberg': ('vbg', 'oe2v', 'orfvbg'),
+        'oe3': ('oe3', 'oe3', 'orfoe3'),
+        'oe1': ('oe1', 'oe1', 'orfoe1'),
+    }
+    _ID_NAMES = ('id', 'guid', 'program')
 
-        data = self._download_json(
-            'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
-            % (self._API_STATION, show_id, show_date), show_id)
+    @classmethod
+    def _get_item_id(cls, data):
+        return traverse_obj(data, *cls._ID_NAMES, expected_type=txt_or_none)
 
-        entries = []
-        for info in data['streams']:
-            loop_stream_id = str_or_none(info.get('loopStreamId'))
-            if not loop_stream_id:
-                continue
-            title = str_or_none(data.get('title'))
-            if not title:
-                continue
-            start = int_or_none(info.get('start'), scale=1000)
-            end = int_or_none(info.get('end'), scale=1000)
-            duration = end - start if end and start else None
-            entries.append({
-                'id': loop_stream_id.replace('.mp3', ''),
-                'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
-                'title': title,
-                'description': clean_html(data.get('subtitle')),
-                'duration': duration,
-                'timestamp': start,
+    @classmethod
+    def _get_api_payload(cls, data, expected_id, in_payload=False):
+        if expected_id not in traverse_obj(data, ('payload',)[:1 if in_payload else 0] + (cls._ID_NAMES, T(txt_or_none))):
+            raise ExtractorError('Unexpected API data result', video_id=expected_id)
+        return data['payload']
+
+    @staticmethod
+    def _extract_podcast_upload(data):
+        return traverse_obj(data, {
+            'url': ('enclosures', 0, 'url'),
+            'ext': ('enclosures', 0, 'type', T(mimetype2ext)),
+            'filesize': ('enclosures', 0, 'length', T(int_or_none)),
+            'title': ('title', T(txt_or_none)),
+            'description': ('description', T(clean_html)),
+            'timestamp': (('published', 'postDate'), T(parse_iso8601)),
+            'duration': ('duration', T(k_float_or_none)),
+            'series': ('podcast', 'title'),
+            'uploader': ((('podcast', 'author'), 'station'), T(txt_or_none)),
+            'uploader_id': ('podcast', 'channel', T(txt_or_none)),
+        }, get_all=False)
+
+    @classmethod
+    def _entries(cls, data, station, item_type=None):
+        if item_type in ('upload', 'podcast-episode'):
+            yield merge_dicts({
+                'id': cls._get_item_id(data),
                 'ext': 'mp3',
-                'series': data.get('programTitle'),
-            })
+                'vcodec': 'none',
+            }, cls._extract_podcast_upload(data), rev=True)
+            return
 
-        return {
-            '_type': 'playlist',
-            'id': show_id,
-            'title': data.get('title'),
-            'description': clean_html(data.get('subtitle')),
-            'entries': entries,
-        }
+        loop_station = cls.STATION_INFO[station][1]
+        for info in traverse_obj(data, ((('streams', Ellipsis), 'stream'), T(lambda v: v if v['loopStreamId'] else None))):
+            item_id = info['loopStreamId']
+            host = info.get('host') or 'loopstream01.apa.at'
+            yield merge_dicts({
+                'id': item_id.replace('.mp3', ''),
+                'ext': 'mp3',
+                'url': update_url_query('https://{0}/'.format(host), {
+                    'channel': loop_station,
+                    'id': item_id,
+                }),
+                'vcodec': 'none',
+                # '_old_archive_ids': [make_archive_id(old_ie, video_id)],
+            }, traverse_obj(data, {
+                'title': ('title', T(txt_or_none)),
+                'description': ('subtitle', T(clean_html)),
+                'uploader': 'station',
+                'series': ('programTitle', T(txt_or_none)),
+            }), traverse_obj(info, {
+                'duration': (('duration',
+                              (None, T(lambda x: x['end'] - x['start']))),
+                             T(k_float_or_none), any),
+                'timestamp': (('start', 'startISO'), T(parse_iso8601), any),
+            }))
 
 
-class ORFFM4IE(ORFRadioIE):
-    IE_NAME = 'orf:fm4'
-    IE_DESC = 'radio FM4'
-    _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
-    _API_STATION = 'fm4'
-    _LOOP_STATION = 'fm4'
+class ORFRadioIE(ORFRadioBase):
+    IE_NAME = 'orf:sound'
+    _STATION_RE = '|'.join(map(re.escape, ORFRadioBase.STATION_INFO.keys()))
 
-    _TEST = {
-        'url': 'http://fm4.orf.at/player/20170107/4CC',
-        'md5': '2b0be47375432a7ef104453432a19212',
+    _VALID_URL = (
+        r'https?://sound\.orf\.at/radio/(?P<station>{0})/sendung/(?P<id>\d+)(?:/(?P<show>\w+))?'.format(_STATION_RE),
+        r'https?://(?P<station>{0})\.orf\.at/player/(?P<date>\d{{8}})/(?P<id>\d+)'.format(_STATION_RE),
+    )
+
+    _TESTS = [{
+        'url': 'https://sound.orf.at/radio/ooe/sendung/37802/guten-morgen-oberoesterreich-am-feiertag',
         'info_dict': {
-            'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
-            'ext': 'mp3',
-            'title': 'Solid Steel Radioshow',
-            'description': 'Die Mixshow von Coldcut und Ninja Tune.',
-            'duration': 3599,
-            'timestamp': 1483819257,
-            'upload_date': '20170107',
+            'id': '37802',
+            'title': 'Guten Morgen Oberösterreich am Feiertag',
+            'description': 'Oberösterreichs meistgehörte regionale Frühsendung.\nRegionale Nachrichten zu jeder halben Stunde.\nModeration: Wolfgang Lehner\nNachrichten:  Stephan Schnabl',
         },
-        'skip': 'Shows from ORF radios are only available for 7 days.',
-        'only_matching': True,
-    }
-
-
-class ORFNOEIE(ORFRadioIE):
-    IE_NAME = 'orf:noe'
-    IE_DESC = 'Radio Niederösterreich'
-    _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'noe'
-    _LOOP_STATION = 'oe2n'
-
-    _TEST = {
-        'url': 'https://noe.orf.at/player/20200423/NGM',
-        'only_matching': True,
-    }
-
-
-class ORFWIEIE(ORFRadioIE):
-    IE_NAME = 'orf:wien'
-    IE_DESC = 'Radio Wien'
-    _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'wie'
-    _LOOP_STATION = 'oe2w'
-
-    _TEST = {
-        'url': 'https://wien.orf.at/player/20200423/WGUM',
-        'only_matching': True,
-    }
-
-
-class ORFBGLIE(ORFRadioIE):
-    IE_NAME = 'orf:burgenland'
-    IE_DESC = 'Radio Burgenland'
-    _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'bgl'
-    _LOOP_STATION = 'oe2b'
-
-    _TEST = {
-        'url': 'https://burgenland.orf.at/player/20200423/BGM',
-        'only_matching': True,
-    }
-
-
-class ORFOOEIE(ORFRadioIE):
-    IE_NAME = 'orf:oberoesterreich'
-    IE_DESC = 'Radio Oberösterreich'
-    _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'ooe'
-    _LOOP_STATION = 'oe2o'
-
-    _TEST = {
-        'url': 'https://ooe.orf.at/player/20200423/OGMO',
-        'only_matching': True,
-    }
-
-
-class ORFSTMIE(ORFRadioIE):
-    IE_NAME = 'orf:steiermark'
-    IE_DESC = 'Radio Steiermark'
-    _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'stm'
-    _LOOP_STATION = 'oe2st'
-
-    _TEST = {
-        'url': 'https://steiermark.orf.at/player/20200423/STGMS',
-        'only_matching': True,
-    }
-
-
-class ORFKTNIE(ORFRadioIE):
-    IE_NAME = 'orf:kaernten'
-    IE_DESC = 'Radio Kärnten'
-    _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'ktn'
-    _LOOP_STATION = 'oe2k'
-
-    _TEST = {
-        'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
-        'only_matching': True,
-    }
-
-
-class ORFSBGIE(ORFRadioIE):
-    IE_NAME = 'orf:salzburg'
-    IE_DESC = 'Radio Salzburg'
-    _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'sbg'
-    _LOOP_STATION = 'oe2s'
-
-    _TEST = {
-        'url': 'https://salzburg.orf.at/player/20200423/SGUM',
-        'only_matching': True,
-    }
-
-
-class ORFTIRIE(ORFRadioIE):
-    IE_NAME = 'orf:tirol'
-    IE_DESC = 'Radio Tirol'
-    _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'tir'
-    _LOOP_STATION = 'oe2t'
-
-    _TEST = {
-        'url': 'https://tirol.orf.at/player/20200423/TGUMO',
-        'only_matching': True,
-    }
-
-
-class ORFVBGIE(ORFRadioIE):
-    IE_NAME = 'orf:vorarlberg'
-    IE_DESC = 'Radio Vorarlberg'
-    _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'vbg'
-    _LOOP_STATION = 'oe2v'
-
-    _TEST = {
-        'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
-        'only_matching': True,
-    }
-
-
-class ORFOE3IE(ORFRadioIE):
-    IE_NAME = 'orf:oe3'
-    IE_DESC = 'Radio Österreich 3'
-    _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'oe3'
-    _LOOP_STATION = 'oe3'
-
-    _TEST = {
-        'url': 'https://oe3.orf.at/player/20200424/3WEK',
-        'only_matching': True,
-    }
-
-
-class ORFOE1IE(ORFRadioIE):
-    IE_NAME = 'orf:oe1'
-    IE_DESC = 'Radio Österreich 1'
-    _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-    _API_STATION = 'oe1'
-    _LOOP_STATION = 'oe1'
-
-    _TEST = {
-        'url': 'http://oe1.orf.at/player/20170108/456544',
-        'md5': '34d8a6e67ea888293741c86a099b745b',
+        'playlist': [{
+            'md5': 'f9ff8517dd681b642a2c900e2c9e6085',
+            'info_dict': {
+                'id': '2024-05-30_0559_tl_66_7DaysThu1_443862',
+                'ext': 'mp3',
+                'title': 'Guten Morgen Oberösterreich am Feiertag',
+                'description': 'Oberösterreichs meistgehörte regionale Frühsendung.\nRegionale Nachrichten zu jeder halben Stunde.\nModeration: Wolfgang Lehner\nNachrichten:  Stephan Schnabl',
+                'timestamp': 1717041587,
+                'upload_date': '20240530',
+                'uploader': 'ooe',
+                'duration': 14413.0,
+            }
+        }],
+        # 'skip': 'Shows from ORF Sound are only available for 30 days.'
+    }, {
+        'url': 'https://oe1.orf.at/player/20240531/758136',
+        'md5': '2397717aaf3ae9c22a4f090ee3b8d374',
         'info_dict': {
-            'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
+            'id': '2024-05-31_1905_tl_51_7DaysFri35_2413387',
             'ext': 'mp3',
-            'title': 'Morgenjournal',
-            'duration': 609,
-            'timestamp': 1483858796,
-            'upload_date': '20170108',
+            'title': '"Who Cares?"',
+            'description': 'Europas größte Netzkonferenz re:publica 2024',
+            'timestamp': 1717175100,
+            'upload_date': '20240531',
+            'uploader': 'oe1',
+            'duration': 1500,
         },
-        'skip': 'Shows from ORF radios are only available for 7 days.'
-    }
+        # 'skip': 'Shows from ORF Sound are only available for 30 days.'
+    }]
+
+    def _real_extract(self, url):
+        m = self._match_valid_url(url)
+        station, show_id = m.group('station', 'id')
+        api_station, _, _ = self.STATION_INFO[station]
+        if 'date' in m.groupdict():
+            data = self._download_json(
+                'https://audioapi.orf.at/{0}/json/4.0/broadcast/{1}/{2}?_o={3}.orf.at'.format(
+                    api_station, show_id, m.group('date'), station), show_id)
+            show_id = data['id']
+        else:
+            data = self._download_json(
+                'https://audioapi.orf.at/{0}/api/json/5.0/broadcast/{1}?_o=sound.orf.at'.format(
+                    api_station, show_id), show_id)
+
+            data = self._get_api_payload(data, show_id, in_payload=True)
+
+        # site sends ISO8601 GMT date-times with separate TZ offset, ignored
+        # TODO: should `..._date` be calculated relative to TZ?
+
+        return merge_dicts(
+            {'_type': 'multi_video'},
+            self.playlist_result(
+                self._entries(data, station), show_id,
+                txt_or_none(data.get('title')),
+                clean_html(data.get('subtitle'))))
+
+
+class ORFRadioCollectionIE(ORFRadioBase):
+    IE_NAME = 'orf:collection'
+    _VALID_URL = r'https?://sound\.orf\.at/collection/(?P<coll_id>\d+)(?:/(?P<item_id>\d+))?'
+
+    _TESTS = [{
+        'url': 'https://sound.orf.at/collection/4/61908/was-das-uberschreiten-des-15-limits-bedeutet',
+        'info_dict': {
+            'id': '2577582',
+        },
+        'playlist': [{
+            'md5': '5789cec7d75575ff58d19c0428c80eb3',
+            'info_dict': {
+                'id': '2024-06-06_1659_tl_54_7DaysThu6_153926',
+                'ext': 'mp3',
+                'title': 'Klimakrise: Was das Überschreiten des 1,5°-Limits bedeutet',
+                'timestamp': 1717686674,
+                'upload_date': '20240606',
+                'uploader': 'fm4',
+            },
+        }],
+        # 'skip': 'Shows from ORF Sound are only available for 30 days.'
+    }, {
+        'url': 'https://sound.orf.at/collection/4/',
+        'info_dict': {
+            'id': '4',
+        },
+        'playlist_mincount': 10,
+        'playlist_maxcount': 13,
+    }]
+
+    def _real_extract(self, url):
+        coll_id, item_id = self._match_valid_url(url).group('coll_id', 'item_id')
+        data = self._download_json(
+            'https://collector.orf.at/api/frontend/collections/{0}?_o=sound.orf.at'.format(
+                coll_id), coll_id)
+        data = self._get_api_payload(data, coll_id, in_payload=True)
+
+        def yield_items():
+            for item in traverse_obj(data, (
+                    'content', 'items', lambda _, v: any(k in v['target']['params'] for k in self._ID_NAMES))):
+                if item_id is None or item_id == txt_or_none(item.get('id')):
+                    target = item['target']
+                    typed_item_id = self._get_item_id(target['params'])
+                    station = target['params'].get('station')
+                    item_type = target.get('type')
+                    if typed_item_id and (station or item_type):
+                        yield station, typed_item_id, item_type
+                    if item_id is not None:
+                        break
+            else:
+                if item_id is not None:
+                    raise ExtractorError('Item not found in collection',
+                                         video_id=coll_id, expected=True)
+
+        def item_playlist(station, typed_item_id, item_type):
+            if item_type == 'upload':
+                item_data = self._download_json('https://audioapi.orf.at/radiothek/api/2.0/upload/{0}?_o=sound.orf.at'.format(
+                    typed_item_id), typed_item_id)
+            elif item_type == 'podcast-episode':
+                item_data = self._download_json('https://audioapi.orf.at/radiothek/api/2.0/episode/{0}?_o=sound.orf.at'.format(
+                    typed_item_id), typed_item_id)
+            else:
+                api_station, _, _ = self.STATION_INFO[station]
+                item_data = self._download_json(
+                    'https://audioapi.orf.at/{0}/api/json/5.0/{1}/{2}?_o=sound.orf.at'.format(
+                        api_station, item_type or 'broadcastitem', typed_item_id), typed_item_id)
+
+            item_data = self._get_api_payload(item_data, typed_item_id, in_payload=True)
+
+            return merge_dicts(
+                {'_type': 'multi_video'},
+                self.playlist_result(
+                    self._entries(item_data, station, item_type), typed_item_id,
+                    txt_or_none(data.get('title')),
+                    clean_html(data.get('subtitle'))))
+
+        def yield_item_entries():
+            for station, typed_id, item_type in yield_items():
+                yield item_playlist(station, typed_id, item_type)
+
+        if item_id is not None:
+            # coll_id = '/'.join((coll_id, item_id))
+            return next(yield_item_entries())
+
+        return self.playlist_result(yield_item_entries(), coll_id, data.get('title'))
+
+
+class ORFPodcastIE(ORFRadioBase):
+    IE_NAME = 'orf:podcast'
+    _STATION_RE = '|'.join(map(re.escape, (x[0] for x in ORFRadioBase.STATION_INFO.values()))) + '|tv'
+    _VALID_URL = r'https?://sound\.orf\.at/podcast/(?P<station>{0})/(?P<show>[\w-]+)/(?P<id>[\w-]+)'.format(_STATION_RE)
+    _TESTS = [{
+        'url': 'https://sound.orf.at/podcast/stm/der-kraeutertipp-von-christine-lackner/rotklee',
+        'md5': '1f2bab2ba90c2ce0c2754196ea78b35f',
+        'info_dict': {
+            'id': 'der-kraeutertipp-von-christine-lackner/rotklee',
+            'ext': 'mp3',
+            'title': 'Rotklee',
+            'description': 'In der Natur weit verbreitet - in der Medizin längst anerkennt: Rotklee. Dieser Podcast begleitet die Sendung "Radio Steiermark am Vormittag", Radio Steiermark, 28. Mai 2024.',
+            'timestamp': 1716891761,
+            'upload_date': '20240528',
+            'uploader_id': 'stm_kraeutertipp',
+            'uploader': 'ORF Radio Steiermark',
+            'duration': 101,
+            'series': 'Der Kräutertipp von Christine Lackner',
+        },
+        # 'skip': 'ORF podcasts are only available for a limited time'
+    }]
+
+    _ID_NAMES = ('slug', 'guid')
+
+    def _real_extract(self, url):
+        station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id')
+        data = self._download_json(
+            'https://audioapi.orf.at/radiothek/api/2.0/podcast/{0}/{1}/{2}'.format(
+                station, show, show_id), show_id)
+        data = self._get_api_payload(data, show_id, in_payload=True)
+
+        return merge_dicts({
+            'id': '/'.join((show, show_id)),
+            'ext': 'mp3',
+            'vcodec': 'none',
+        }, self._extract_podcast_upload(data), rev=True)
 
 
 class ORFIPTVIE(InfoExtractor):

From e20ca543f037bd3a8e38507b870ed3a3de3c32e7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 1 Jun 2024 13:43:37 +0100
Subject: [PATCH 25/48] [ORF] Re-factor and update`ORFFM4StoryIE` * fix getting
 media via DASH instead of inaccessible mp4 * also get in-page YT media

---
 youtube_dl/extractor/orf.py | 253 ++++++++++++++++++------------------
 1 file changed, 126 insertions(+), 127 deletions(-)

diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 25c16c84d..f03aa40dc 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -6,6 +6,7 @@ import functools
 import re
 
 from .common import InfoExtractor
+from .youtube import YoutubeIE
 from ..utils import (
     clean_html,
     determine_ext,
@@ -14,10 +15,8 @@ from ..utils import (
     int_or_none,
     merge_dicts,
     mimetype2ext,
-    orderedSet,
     parse_age_limit,
     parse_iso8601,
-    remove_end,
     strip_jsonp,
     txt_or_none,
     unified_strdate,
@@ -305,11 +304,90 @@ class ORFPodcastIE(ORFRadioBase):
         }, self._extract_podcast_upload(data), rev=True)
 
 
-class ORFIPTVIE(InfoExtractor):
+class ORFIPTVBase(InfoExtractor):
+    _TITLE_STRIP_RE = ''
+
+    def _extract_video(self, video_id, webpage, fatal=False):
+
+        data = self._download_json(
+            'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+            video_id)[0]
+
+        video = traverse_obj(data, (
+            'sources', ('default', 'q8c'),
+            T(lambda x: x if x['loadBalancerUrl'] else None),
+            any))
+
+        load_balancer_url = video['loadBalancerUrl']
+
+        try:
+            rendition = self._download_json(
+                load_balancer_url, video_id, transform_source=strip_jsonp)
+        except ExtractorError:
+            rendition = None
+
+        if not rendition:
+            rendition = {
+                'redirect': {
+                    'smil': re.sub(
+                        r'(/)jsonp(/.+\.)mp4$', r'\1dash\2smil/manifest.mpd',
+                        load_balancer_url),
+                },
+            }
+
+        f = traverse_obj(video, {
+            'abr': ('audioBitrate', T(int_or_none)),
+            'vbr': ('bitrate', T(int_or_none)),
+            'fps': ('videoFps', T(int_or_none)),
+            'width': ('videoWidth', T(int_or_none)),
+            'height': ('videoHeight', T(int_or_none)),
+        })
+
+        formats = []
+        for format_id, format_url in traverse_obj(rendition, (
+                'redirect', T(dict.items), Ellipsis)):
+            if format_id == 'rtmp':
+                ff = f.copy()
+                ff.update({
+                    'url': format_url,
+                    'format_id': format_id,
+                })
+                formats.append(ff)
+            elif determine_ext(format_url) == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    format_url, video_id, f4m_id=format_id))
+            elif determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id,
+                    entry_protocol='m3u8_native'))
+            elif determine_ext(format_url) == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    format_url, video_id, mpd_id=format_id))
+
+        if formats or fatal:
+            self._sort_formats(formats)
+        else:
+            return
+
+        return merge_dicts({
+            'id': video_id,
+            'title': re.sub(self._TITLE_STRIP_RE, '', self._og_search_title(webpage)),
+            'description': self._og_search_description(webpage),
+            'upload_date': unified_strdate(self._html_search_meta(
+                'dc.date', webpage, 'upload date', fatal=False)),
+            'formats': formats,
+        }, traverse_obj(data, {
+            'duration': ('duration', T(k_float_or_none)),
+            'thumbnail': ('sources', 'default', 'preview', T(url_or_none)),
+        }), rev=True)
+
+
+class ORFIPTVIE(ORFIPTVBase):
     IE_NAME = 'orf:iptv'
     IE_DESC = 'iptv.ORF.at'
     _WORKING = False  # URLs redirect to orf.at/
     _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+    _TITLE_STRIP_RE = r'\s+-\s+iptv\.ORF\.at\S*$'
 
     _TEST = {
         'url': 'http://iptv.orf.at/stories/2275236/',
@@ -334,74 +412,32 @@ class ORFIPTVIE(InfoExtractor):
         video_id = self._search_regex(
             r'data-video(?:id)?="(\d+)"', webpage, 'video id')
 
-        data = self._download_json(
-            'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
-            video_id)[0]
-
-        duration = float_or_none(data['duration'], 1000)
-
-        video = data['sources']['default']
-        load_balancer_url = video['loadBalancerUrl']
-        abr = int_or_none(video.get('audioBitrate'))
-        vbr = int_or_none(video.get('bitrate'))
-        fps = int_or_none(video.get('videoFps'))
-        width = int_or_none(video.get('videoWidth'))
-        height = int_or_none(video.get('videoHeight'))
-        thumbnail = video.get('preview')
-
-        rendition = self._download_json(
-            load_balancer_url, video_id, transform_source=strip_jsonp)
-
-        f = {
-            'abr': abr,
-            'vbr': vbr,
-            'fps': fps,
-            'width': width,
-            'height': height,
-        }
-
-        formats = []
-        for format_id, format_url in rendition['redirect'].items():
-            if format_id == 'rtmp':
-                ff = f.copy()
-                ff.update({
-                    'url': format_url,
-                    'format_id': format_id,
-                })
-                formats.append(ff)
-            elif determine_ext(format_url) == 'f4m':
-                formats.extend(self._extract_f4m_formats(
-                    format_url, video_id, f4m_id=format_id))
-            elif determine_ext(format_url) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    format_url, video_id, 'mp4', m3u8_id=format_id))
-            else:
-                continue
-        self._sort_formats(formats)
-
-        title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
-        description = self._og_search_description(webpage)
-        upload_date = unified_strdate(self._html_search_meta(
-            'dc.date', webpage, 'upload date'))
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'duration': duration,
-            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-            'formats': formats,
-        }
+        return self._extract_video(video_id, webpage)
 
 
-class ORFFM4StoryIE(InfoExtractor):
+class ORFFM4StoryIE(ORFIPTVBase):
     IE_NAME = 'orf:fm4:story'
     IE_DESC = 'fm4.orf.at stories'
     _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
+    _TITLE_STRIP_RE = r'\s+-\s+fm4\.ORF\.at\s*$'
 
-    _TEST = {
+    _TESTS = [{
+        'url': 'https://fm4.orf.at/stories/3041554/',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': '3041554',
+            'title': 'Is The EU Green Deal In Mortal Danger?',
+        },
+        'playlist_count': 4,
+        'params': {
+            'format': 'bestvideo',
+        },
+    }, {
         'url': 'http://fm4.orf.at/stories/2865738/',
+        'info_dict': {
+            'id': '2865738',
+            'title': 'Manu Delago und Inner Tongue live',
+        },
         'playlist': [{
             'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
             'info_dict': {
@@ -418,86 +454,49 @@ class ORFFM4StoryIE(InfoExtractor):
             'info_dict': {
                 'id': '547798',
                 'ext': 'flv',
-                'title': 'Manu Delago und Inner Tongue live (2)',
+                'title': 'Manu Delago und Inner Tongue https://vod-ww.mdn.ors.at/cms-worldwide_episodes_nas/_definst_/nas/cms-worldwide_episodes/online/14228823_0005.smil/chunklist_b992000_vo.m3u8live (2)',
                 'duration': 1504.08,
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'upload_date': '20170913',
                 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
             },
         }],
-    }
+        'skip': 'Videos gone',
+    }]
 
     def _real_extract(self, url):
         story_id = self._match_id(url)
         webpage = self._download_webpage(url, story_id)
 
         entries = []
-        all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
-        for idx, video_id in enumerate(all_ids):
-            data = self._download_json(
-                'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
-                video_id)[0]
+        seen_ids = set()
+        for idx, video_id in enumerate(re.findall(r'data-video(?:id)?="(\d+)"', webpage)):
+            if video_id in seen_ids:
+                continue
+            seen_ids.add(video_id)
+            entry = self._extract_video(video_id, webpage, fatal=False)
+            if not entry:
+                continue
 
-            duration = float_or_none(data['duration'], 1000)
-
-            video = data['sources']['q8c']
-            load_balancer_url = video['loadBalancerUrl']
-            abr = int_or_none(video.get('audioBitrate'))
-            vbr = int_or_none(video.get('bitrate'))
-            fps = int_or_none(video.get('videoFps'))
-            width = int_or_none(video.get('videoWidth'))
-            height = int_or_none(video.get('videoHeight'))
-            thumbnail = video.get('preview')
-
-            rendition = self._download_json(
-                load_balancer_url, video_id, transform_source=strip_jsonp)
-
-            f = {
-                'abr': abr,
-                'vbr': vbr,
-                'fps': fps,
-                'width': width,
-                'height': height,
-            }
-
-            formats = []
-            for format_id, format_url in rendition['redirect'].items():
-                if format_id == 'rtmp':
-                    ff = f.copy()
-                    ff.update({
-                        'url': format_url,
-                        'format_id': format_id,
-                    })
-                    formats.append(ff)
-                elif determine_ext(format_url) == 'f4m':
-                    formats.extend(self._extract_f4m_formats(
-                        format_url, video_id, f4m_id=format_id))
-                elif determine_ext(format_url) == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        format_url, video_id, 'mp4', m3u8_id=format_id))
-                else:
-                    continue
-            self._sort_formats(formats)
-
-            title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
             if idx >= 1:
                 # Titles are duplicates, make them unique
-                title += ' (' + str(idx + 1) + ')'
-            description = self._og_search_description(webpage)
-            upload_date = unified_strdate(self._html_search_meta(
-                'dc.date', webpage, 'upload date'))
+                entry['title'] = '%s (%d)' % (entry['title'], idx)
 
-            entries.append({
-                'id': video_id,
-                'title': title,
-                'description': description,
-                'duration': duration,
-                'thumbnail': thumbnail,
-                'upload_date': upload_date,
-                'formats': formats,
-            })
+            entries.append(entry)
 
-        return self.playlist_result(entries)
+        seen_ids = set()
+        for yt_id in re.findall(
+                r'data-id\s*=\s*["\']([\w-]+)[^>]+\bclass\s*=\s*["\']youtube\b',
+                webpage):
+            if yt_id in seen_ids:
+                continue
+            seen_ids.add(yt_id)
+            if YoutubeIE.suitable(yt_id):
+                entries.append(self.url_result(yt_id, ie='Youtube', video_id=yt_id))
+
+        return self.playlist_result(
+            entries, story_id,
+            re.sub(self._TITLE_STRIP_RE, '', self._og_search_title(webpage, default='') or None))
 
 
 class ORFONBase(InfoExtractor):

From a48fe7491df1ef9c1d10689e852f0e8e5f48253c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Jun 2024 12:32:07 +0100
Subject: [PATCH 26/48] [ORF] Skip tests with limited availability

---
 youtube_dl/extractor/orf.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index f03aa40dc..1ee78edbc 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -135,7 +135,7 @@ class ORFRadioIE(ORFRadioBase):
                 'duration': 14413.0,
             }
         }],
-        # 'skip': 'Shows from ORF Sound are only available for 30 days.'
+        'skip': 'Shows from ORF Sound are only available for 30 days.'
     }, {
         'url': 'https://oe1.orf.at/player/20240531/758136',
         'md5': '2397717aaf3ae9c22a4f090ee3b8d374',
@@ -149,7 +149,7 @@ class ORFRadioIE(ORFRadioBase):
             'uploader': 'oe1',
             'duration': 1500,
         },
-        # 'skip': 'Shows from ORF Sound are only available for 30 days.'
+        'skip': 'Shows from ORF Sound are only available for 30 days.'
     }]
 
     def _real_extract(self, url):
@@ -199,8 +199,9 @@ class ORFRadioCollectionIE(ORFRadioBase):
                 'uploader': 'fm4',
             },
         }],
-        # 'skip': 'Shows from ORF Sound are only available for 30 days.'
+        'skip': 'Shows from ORF Sound are only available for 30 days.'
     }, {
+        # persistent playlist (FM4 Highlights)
         'url': 'https://sound.orf.at/collection/4/',
         'info_dict': {
             'id': '4',
@@ -285,7 +286,7 @@ class ORFPodcastIE(ORFRadioBase):
             'duration': 101,
             'series': 'Der Kräutertipp von Christine Lackner',
         },
-        # 'skip': 'ORF podcasts are only available for a limited time'
+        'skip': 'ORF podcasts are only available for a limited time'
     }]
 
     _ID_NAMES = ('slug', 'guid')
@@ -642,6 +643,7 @@ class ORFONIE(ORFONBase):
         'params': {
             'format': 'bestvideo',
         },
+        'skip': 'Available until 2024-08-12',
     }, {
         'url': 'https://on.orf.at/video/3220355',
         'md5': '925a93b2b9a37da5c9b979d7cf71aa2e',
@@ -683,6 +685,7 @@ class ORFONIE(ORFONBase):
             'noplaylist': True,
             'format': 'bestvideo',
         },
+        'skip': 'Available until 2024-06-14',
     }, {
         # Video with multiple segments and no combined version
         'url': 'https://on.orf.at/video/14227864/formel-1-grosser-preis-von-monaco-2024',

From 0153b387e57e0bb8e580f1869f85596d2767fb0d Mon Sep 17 00:00:00 2001
From: Paper <paper@paper.us.eu.org>
Date: Tue, 11 Jun 2024 08:21:39 -0400
Subject: [PATCH 27/48] [VidLii] Add 720p support (#30924)

* [VidLii] Add HD support  (yt-dlp backport-ish)

* Also fix a bug with the view count

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/vidlii.py | 59 +++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py
index f4774256b..47f328e87 100644
--- a/youtube_dl/extractor/vidlii.py
+++ b/youtube_dl/extractor/vidlii.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+
 from ..utils import (
     float_or_none,
     get_element_by_id,
@@ -11,6 +12,7 @@ from ..utils import (
     strip_or_none,
     unified_strdate,
     urljoin,
+    str_to_int,
 )
 
 
@@ -35,6 +37,26 @@ class VidLiiIE(InfoExtractor):
             'categories': ['News & Politics'],
             'tags': ['Vidlii', 'Jan', 'Videogames'],
         }
+    }, {
+        # HD
+        'url': 'https://www.vidlii.com/watch?v=2Ng8Abj2Fkl',
+        'md5': '450e7da379c884788c3a4fa02a3ce1a4',
+        'info_dict': {
+            'id': '2Ng8Abj2Fkl',
+            'ext': 'mp4',
+            'title': 'test',
+            'description': 'md5:cc55a86032a7b6b3cbfd0f6b155b52e9',
+            'thumbnail': 'https://www.vidlii.com/usfi/thmp/2Ng8Abj2Fkl.jpg',
+            'uploader': 'VidLii',
+            'uploader_url': 'https://www.vidlii.com/user/VidLii',
+            'upload_date': '20200927',
+            'duration': 5,
+            'view_count': int,
+            'comment_count': int,
+            'average_rating': float,
+            'categories': ['Film & Animation'],
+            'tags': list,
+        },
     }, {
         'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
         'only_matching': True,
@@ -46,11 +68,32 @@ class VidLiiIE(InfoExtractor):
         webpage = self._download_webpage(
             'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
 
-        video_url = self._search_regex(
-            r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
-            'video url', group='url')
+        formats = []
 
-        title = self._search_regex(
+        def add_format(format_url, height=None):
+            height = int(self._search_regex(r'(\d+)\.mp4',
+                         format_url, 'height', default=360))
+
+            formats.append({
+                'url': format_url,
+                'format_id': '%dp' % height if height else None,
+                'height': height,
+            })
+
+        sources = re.findall(
+            r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
+            webpage)
+
+        formats = []
+        if len(sources) > 1:
+            add_format(sources[1][1])
+            self._check_formats(formats, video_id)
+        if len(sources) > 0:
+            add_format(sources[0][1])
+
+        self._sort_formats(formats)
+
+        title = self._html_search_regex(
             (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
             'title')
 
@@ -82,9 +125,9 @@ class VidLiiIE(InfoExtractor):
             default=None) or self._search_regex(
             r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
 
-        view_count = int_or_none(self._search_regex(
-            (r'<strong>(\d+)</strong> views',
-             r'Views\s*:\s*<strong>(\d+)</strong>'),
+        view_count = str_to_int(self._html_search_regex(
+            (r'<strong>([\d,.]+)</strong> views',
+             r'Views\s*:\s*<strong>([\d,.]+)</strong>'),
             webpage, 'view count', fatal=False))
 
         comment_count = int_or_none(self._search_regex(
@@ -109,7 +152,7 @@ class VidLiiIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'url': video_url,
+            'formats': formats,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,

From 2eac0fa3799b3d027148341186a52fb5a6288473 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 19 Jun 2024 19:04:47 +0100
Subject: [PATCH 28/48] [utils] Save `orig_msg` in `ExtractorError`

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index cd4303566..113c913df 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2406,7 +2406,7 @@ class ExtractorError(YoutubeDLError):
         """ tb, if given, is the original traceback (so that it can be printed out).
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
         """
-
+        self.orig_msg = msg
         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
             expected = True
         if video_id is not None:

From ad01fa6ccadd1ecade8002e937492a141d3b8f25 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 19 Jun 2024 21:44:27 +0100
Subject: [PATCH 29/48] [jsinterp] Add Debugger from yt-dlp *
 https://github.com/yt-dlp/yt-dlp/commit/8f53dc4 * thx pukkandan

---
 test/test_jsinterp.py          |  8 ++++---
 test/test_youtube_signature.py |  4 ++++
 youtube_dl/extractor/common.py |  1 -
 youtube_dl/jsinterp.py         | 42 +++++++++++++++++++++++++++++++++-
 4 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 91b12f544..da8e98020 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -577,9 +577,11 @@ class TestJSInterpreter(unittest.TestCase):
     def test_unary_operators(self):
         jsi = JSInterpreter('function f(){return 2  -  - - 2;}')
         self.assertEqual(jsi.call_function('f'), 0)
-        # fails
-        # jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
-        # self.assertEqual(jsi.call_function('f'), 0)
+        jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+        # https://github.com/ytdl-org/youtube-dl/issues/32815
+        jsi = JSInterpreter('function f(){return 0  - 7 * - 6;}')
+        self.assertEqual(jsi.call_function('f'), 42)
 
     """ # fails so far
     def test_packed(self):
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index f45dfec7c..cafba7a5c 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -158,6 +158,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
         '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
     ),
+    (
+        'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
+        '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
+    ),
 ]
 
 
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index b10e84416..9b0016d07 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -3033,7 +3033,6 @@ class InfoExtractor(object):
             transform_source=transform_source, default=None)
 
     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
-
         # allow passing `transform_source` through to _find_jwplayer_data()
         transform_source = kwargs.pop('transform_source', None)
         kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 86d902248..e258ebd00 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -14,6 +14,7 @@ from .utils import (
     remove_quotes,
     unified_timestamp,
     variadic,
+    write_string,
 )
 from .compat import (
     compat_basestring,
@@ -220,6 +221,42 @@ class LocalNameSpace(ChainMap):
         return 'LocalNameSpace%s' % (self.maps, )
 
 
+class Debugger(object):
+    ENABLED = False
+
+    @staticmethod
+    def write(*args, **kwargs):
+        level = kwargs.get('level', 100)
+
+        def truncate_string(s, left, right=0):
+            if s is None or len(s) <= left + right:
+                return s
+            return '...'.join((s[:left - 3], s[-right:] if right else ''))
+
+        write_string('[debug] JS: {0}{1}\n'.format(
+            '  ' * (100 - level),
+            ' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
+
+    @classmethod
+    def wrap_interpreter(cls, f):
+        def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
+            if cls.ENABLED and stmt.strip():
+                cls.write(stmt, level=allow_recursion)
+            try:
+                ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
+            except Exception as e:
+                if cls.ENABLED:
+                    if isinstance(e, ExtractorError):
+                        e = e.orig_msg
+                    cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
+                raise
+            if cls.ENABLED and stmt.strip():
+                if should_ret or not repr(ret) == stmt:
+                    cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
+            return ret, should_ret
+        return interpret_statement
+
+
 class JSInterpreter(object):
     __named_object_counter = 0
 
@@ -416,7 +453,7 @@ class JSInterpreter(object):
         except Exception as e:
             if allow_undefined:
                 return JS_Undefined
-            raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
+            raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
         try:
@@ -438,6 +475,7 @@ class JSInterpreter(object):
     _FINALLY_RE = re.compile(r'finally\s*\{')
     _SWITCH_RE = re.compile(r'switch\s*\(')
 
+    @Debugger.wrap_interpreter
     def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         if allow_recursion < 0:
             raise self.Exception('Recursion limit reached')
@@ -797,6 +835,8 @@ class JSInterpreter(object):
 
             def eval_method():
                 if (variable, member) == ('console', 'debug'):
+                    if Debugger.ENABLED:
+                        Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
                     return
                 types = {
                     'String': compat_str,

From fd8242e3efd3c0e2ba9a45c662d6983c00b21d6d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jun 2024 02:36:54 +0100
Subject: [PATCH 30/48] [jsinterp] Fix and improve expression parsing * improve
 BODMAS (fixes https://github.com/ytdl-org/youtube-dl/issues/32815) * support
 more weird expressions with multiple unary ops

---
 youtube_dl/jsinterp.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index e258ebd00..12b71ed6a 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -798,18 +798,28 @@ class JSInterpreter(object):
             right_expr = separated.pop()
             # handle operators that are both unary and binary, minimal BODMAS
             if op in ('+', '-'):
+                # simplify/adjust consecutive instances of these operators
                 undone = 0
                 while len(separated) > 1 and not separated[-1].strip():
                     undone += 1
                     separated.pop()
                 if op == '-' and undone % 2 != 0:
                     right_expr = op + right_expr
+                elif op == '+':
+                    while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
+                        right_expr = separated.pop() + right_expr
+                # hanging op at end of left => unary + (strip) or - (push right)
                 left_val = separated[-1]
                 for dm_op in ('*', '%', '/', '**'):
                     bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
                     if len(bodmas) > 1 and not bodmas[-1].strip():
                         expr = op.join(separated) + op + right_expr
-                        right_expr = None
+                        if len(separated) > 1:
+                            separated.pop()
+                            right_expr = op.join((left_val, right_expr))
+                        else:
+                            separated = [op.join((left_val, right_expr))]
+                            right_expr = None
                         break
                 if right_expr is None:
                     continue

From e0094e63c3bce919841b6e3634c877fcef8772a1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jun 2024 02:57:52 +0100
Subject: [PATCH 31/48] [jsinterp] Various tweaks * treat Infinity like NaN *
 cache operator list

---
 youtube_dl/jsinterp.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 12b71ed6a..02adf6678 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -54,15 +54,16 @@ def wraps_op(op):
 
 # NB In principle NaN cannot be checked by membership.
 # Here all NaN values are actually this one, so _NaN is _NaN,
-# although _NaN != _NaN.
+# although _NaN != _NaN. Ditto Infinity.
 
 _NaN = float('nan')
+_Infinity = float('inf')
 
 
 def _js_bit_op(op):
 
     def zeroise(x):
-        return 0 if x in (None, JS_Undefined, _NaN) else x
+        return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
 
     @wraps_op(op)
     def wrapped(a, b):
@@ -85,7 +86,7 @@ def _js_arith_op(op):
 def _js_div(a, b):
     if JS_Undefined in (a, b) or not (a or b):
         return _NaN
-    return operator.truediv(a or 0, b) if b else float('inf')
+    return operator.truediv(a or 0, b) if b else _Infinity
 
 
 def _js_mod(a, b):
@@ -344,8 +345,7 @@ class JSInterpreter(object):
     def __op_chars(cls):
         op_chars = set(';,[')
         for op in cls._all_operators():
-            for c in op[0]:
-                op_chars.add(c)
+            op_chars.update(op[0])
         return op_chars
 
     def _named_object(self, namespace, obj):
@@ -363,9 +363,8 @@ class JSInterpreter(object):
         # collections.Counter() is ~10% slower in both 2.7 and 3.9
         counters = dict((k, 0) for k in _MATCHING_PARENS.values())
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
-        in_quote, escaping, skipping = None, False, 0
-        after_op, in_regex_char_group = True, False
-
+        in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
+        skipping = 0
         for idx, char in enumerate(expr):
             paren_delta = 0
             if not in_quote:
@@ -419,10 +418,12 @@ class JSInterpreter(object):
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod
-    def _all_operators():
-        return itertools.chain(
-            # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
-            _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
+    def _all_operators(_cached=[]):
+        if not _cached:
+            _cached.extend(itertools.chain(
+                # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
+                _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
+        return _cached
 
     def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
         if op in ('||', '&&'):
@@ -549,7 +550,6 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
-
             m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
             if m:
                 # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
@@ -731,7 +731,7 @@ class JSInterpreter(object):
                 (?P<op>{_OPERATOR_RE})?
                 =(?!=)(?P<expr>.*)$
             )|(?P<return>
-                (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
+                (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
             )|(?P<indexing>
                 (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
             )|(?P<attribute>
@@ -765,11 +765,12 @@ class JSInterpreter(object):
             raise JS_Break()
         elif expr == 'continue':
             raise JS_Continue()
-
         elif expr == 'undefined':
             return JS_Undefined, should_return
         elif expr == 'NaN':
             return _NaN, should_return
+        elif expr == 'Infinity':
+            return _Infinity, should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return

From 4d05f84325070c3f6fe2ed6096138757675469a4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jun 2024 03:22:02 +0100
Subject: [PATCH 32/48] [PalcoMP3] Conform to new linter rule * no space after
 @ in decorator

---
 youtube_dl/extractor/palcomp3.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/palcomp3.py b/youtube_dl/extractor/palcomp3.py
index fb29d83f9..60f7a4d48 100644
--- a/youtube_dl/extractor/palcomp3.py
+++ b/youtube_dl/extractor/palcomp3.py
@@ -8,7 +8,7 @@ from ..compat import compat_str
 from ..utils import (
     int_or_none,
     str_or_none,
-    try_get,
+    traverse_obj,
 )
 
 
@@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
     }
     name'''
 
-    @ classmethod
+    @classmethod
     def suitable(cls, url):
         return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
 
@@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
         artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
 
         def entries():
-            for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+            for music in traverse_obj(artist, (
+                    'musics', 'nodes', lambda _, m: m['musicID'])):
                 yield self._parse_music(music)
 
         return self.playlist_result(
@@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
             'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
             'description': 'md5:7043342c09a224598e93546e98e49282',
             'upload_date': '20161107',
-            'uploader_id': 'maiaramaraisaoficial',
+            'uploader_id': '@maiaramaraisaoficial',
             'uploader': 'Maiara e Maraisa',
         }
     }]

From 3c466186a86a22c3fc050f038c38b76ffa1a2dad Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jun 2024 12:34:19 +0100
Subject: [PATCH 33/48] [utils] Back-port Namespace and MEDIA_EXTENSIONS from
 yt-dlp Thx pukkandan * Namespace:
 https://github.com/yt-dlp/yt-dlp/commit/591bb9d355 * MEDIA_EXTENSIONS:
 https://github.com/yt-dlp/yt-dlp/commit/8dc5930511

---
 youtube_dl/utils.py | 44 +++++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 113c913df..1af3e2b57 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1717,20 +1717,38 @@ TIMEZONE_NAMES = {
     'PST': -8, 'PDT': -7   # Pacific
 }
 
+
+class Namespace(object):
+    """Immutable namespace"""
+
+    def __init__(self, **kw_attr):
+        self.__dict__.update(kw_attr)
+
+    def __iter__(self):
+        return iter(self.__dict__.values())
+
+    @property
+    def items_(self):
+        return self.__dict__.items()
+
+
+MEDIA_EXTENSIONS = Namespace(
+    common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
+    video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
+    common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
+    audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
+    thumbnails=('jpg', 'png', 'webp'),
+    # storyboards=('mhtml', ),
+    subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
+    manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
+)
+MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
+MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
+
 KNOWN_EXTENSIONS = (
-    'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
-    'flv', 'f4v', 'f4a', 'f4b',
-    'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
-    'mkv', 'mka', 'mk3d',
-    'avi', 'divx',
-    'mov',
-    'asf', 'wmv', 'wma',
-    '3gp', '3g2',
-    'mp3',
-    'flac',
-    'ape',
-    'wav',
-    'f4f', 'f4m', 'm3u8', 'smil')
+    MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
+    + MEDIA_EXTENSIONS.manifests
+)
 
 # needed for sanitizing filenames in restricted mode
 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',

From 46521096433aceaa41b4caa845bed22ca6f377ce Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jun 2024 18:37:25 +0100
Subject: [PATCH 34/48] [core,utils] Implement unsafe file extension mitigation
 * from
 https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4, thx
 grub4k

---
 test/test_utils.py      |  46 ++++++++++
 youtube_dl/YoutubeDL.py |  17 ++++
 youtube_dl/utils.py     | 189 +++++++++++++++++++++++++++++++---------
 3 files changed, 209 insertions(+), 43 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index de7fe80b8..2947cce7e 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -14,9 +14,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import io
 import itertools
 import json
+import types
 import xml.etree.ElementTree
 
 from youtube_dl.utils import (
+    _UnsafeExtensionError,
     age_restricted,
     args_to_str,
     base_url,
@@ -270,6 +272,27 @@ class TestUtil(unittest.TestCase):
             expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
             '%s/expanded' % compat_getenv('HOME'))
 
+    _uncommon_extensions = [
+        ('exe', 'abc.exe.ext'),
+        ('de', 'abc.de.ext'),
+        ('../.mp4', None),
+        ('..\\.mp4', None),
+    ]
+
+    def assertUnsafeExtension(self, ext=None):
+        assert_raises = self.assertRaises(_UnsafeExtensionError)
+        assert_raises.ext = ext
+        orig_exit = assert_raises.__exit__
+
+        def my_exit(self_, exc_type, exc_val, exc_tb):
+            did_raise = orig_exit(exc_type, exc_val, exc_tb)
+            if did_raise and assert_raises.ext is not None:
+                self.assertEqual(assert_raises.ext, assert_raises.exception.extension, 'Unsafe extension  not as unexpected')
+            return did_raise
+
+        assert_raises.__exit__ = types.MethodType(my_exit, assert_raises)
+        return assert_raises
+
     def test_prepend_extension(self):
         self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
         self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@@ -278,6 +301,19 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
         self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
 
+        # Test uncommon extensions
+        self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
+        for ext, result in self._uncommon_extensions:
+            with self.assertUnsafeExtension(ext):
+                prepend_extension('abc', ext)
+            if result:
+                self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
+            else:
+                with self.assertUnsafeExtension(ext):
+                    prepend_extension('abc.ext', ext, 'ext')
+            with self.assertUnsafeExtension(ext):
+                prepend_extension('abc.unexpected_ext', ext, 'ext')
+
     def test_replace_extension(self):
         self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
         self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@@ -286,6 +322,16 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
         self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
 
+        # Test uncommon extensions
+        self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
+        for ext, _ in self._uncommon_extensions:
+            with self.assertUnsafeExtension(ext):
+                replace_extension('abc', ext)
+            with self.assertUnsafeExtension(ext):
+                replace_extension('abc.ext', ext, 'ext')
+            with self.assertUnsafeExtension(ext):
+                replace_extension('abc.unexpected_ext', ext, 'ext')
+
     def test_subtitles_filename(self):
         self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
         self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index dad44435f..c19501915 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -7,6 +7,7 @@ import collections
 import copy
 import datetime
 import errno
+import functools
 import io
 import itertools
 import json
@@ -53,6 +54,7 @@ from .compat import (
     compat_urllib_request_DataHandler,
 )
 from .utils import (
+    _UnsafeExtensionError,
     age_restricted,
     args_to_str,
     bug_reports_message,
@@ -129,6 +131,20 @@ if compat_os_name == 'nt':
     import ctypes
 
 
+def _catch_unsafe_file_extension(func):
+    @functools.wraps(func)
+    def wrapper(self, *args, **kwargs):
+        try:
+            return func(self, *args, **kwargs)
+        except _UnsafeExtensionError as error:
+            self.report_error(
+                '{0} found; to avoid damaging your system, this value is disallowed.'
+                ' If you believe this is an error{1}').format(
+                    error.message, bug_reports_message(','))
+
+    return wrapper
+
+
 class YoutubeDL(object):
     """YoutubeDL class.
 
@@ -1925,6 +1941,7 @@ class YoutubeDL(object):
         if self.params.get('forcejson', False):
             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
 
+    @_catch_unsafe_file_extension
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
 
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 1af3e2b57..df203b97a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1717,39 +1717,6 @@ TIMEZONE_NAMES = {
     'PST': -8, 'PDT': -7   # Pacific
 }
 
-
-class Namespace(object):
-    """Immutable namespace"""
-
-    def __init__(self, **kw_attr):
-        self.__dict__.update(kw_attr)
-
-    def __iter__(self):
-        return iter(self.__dict__.values())
-
-    @property
-    def items_(self):
-        return self.__dict__.items()
-
-
-MEDIA_EXTENSIONS = Namespace(
-    common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
-    video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
-    common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
-    audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
-    thumbnails=('jpg', 'png', 'webp'),
-    # storyboards=('mhtml', ),
-    subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
-    manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
-)
-MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
-MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
-
-KNOWN_EXTENSIONS = (
-    MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
-    + MEDIA_EXTENSIONS.manifests
-)
-
 # needed for sanitizing filenames in restricted mode
 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
@@ -3977,19 +3944,22 @@ def parse_duration(s):
     return duration
 
 
-def prepend_extension(filename, ext, expected_real_ext=None):
+def _change_extension(prepend, filename, ext, expected_real_ext=None):
     name, real_ext = os.path.splitext(filename)
-    return (
-        '{0}.{1}{2}'.format(name, ext, real_ext)
-        if not expected_real_ext or real_ext[1:] == expected_real_ext
-        else '{0}.{1}'.format(filename, ext))
+    sanitize_extension = _UnsafeExtensionError.sanitize_extension
+
+    if not expected_real_ext or real_ext.partition('.')[0::2] == ('', expected_real_ext):
+        filename = name
+        if prepend and real_ext:
+            sanitize_extension(ext, prepend=prepend)
+            return ''.join((filename, '.', ext, real_ext))
+
+    # Mitigate path traversal and file impersonation attacks
+    return '.'.join((filename, sanitize_extension(ext)))
 
 
-def replace_extension(filename, ext, expected_real_ext=None):
-    name, real_ext = os.path.splitext(filename)
-    return '{0}.{1}'.format(
-        name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
-        ext)
+prepend_extension = functools.partial(_change_extension, True)
+replace_extension = functools.partial(_change_extension, False)
 
 
 def check_executable(exe, args=[]):
@@ -6579,3 +6549,136 @@ def join_nonempty(*values, **kwargs):
     if from_dict is not None:
         values = (traverse_obj(from_dict, variadic(v)) for v in values)
     return delim.join(map(compat_str, filter(None, values)))
+
+
+class Namespace(object):
+    """Immutable namespace"""
+
+    def __init__(self, **kw_attr):
+        self.__dict__.update(kw_attr)
+
+    def __iter__(self):
+        return iter(self.__dict__.values())
+
+    @property
+    def items_(self):
+        return self.__dict__.items()
+
+
+MEDIA_EXTENSIONS = Namespace(
+    common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
+    video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
+    common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
+    audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
+    thumbnails=('jpg', 'png', 'webp'),
+    # storyboards=('mhtml', ),
+    subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
+    manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
+)
+MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
+MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
+
+KNOWN_EXTENSIONS = (
+    MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
+    + MEDIA_EXTENSIONS.manifests
+)
+
+
+class _UnsafeExtensionError(Exception):
+    """
+    Mitigation exception for unwanted file overwrite/path traversal
+    This should be caught in YoutubeDL.py with a warning
+
+    Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
+    """
+    _ALLOWED_EXTENSIONS = frozenset(itertools.chain(
+        (   # internal
+            'description',
+            'json',
+            'meta',
+            'orig',
+            'part',
+            'temp',
+            'uncut',
+            'unknown_video',
+            'ytdl',
+        ),
+        # video
+        MEDIA_EXTENSIONS.video, (
+            'avif',
+            'ismv',
+            'm2ts',
+            'm4s',
+            'mng',
+            'mpeg',
+            'qt',
+            'swf',
+            'ts',
+            'vp9',
+            'wvm',
+        ),
+        # audio
+        MEDIA_EXTENSIONS.audio, (
+            'isma',
+            'mid',
+            'mpga',
+            'ra',
+        ),
+        # image
+        MEDIA_EXTENSIONS.thumbnails, (
+            'bmp',
+            'gif',
+            'ico',
+            'heic',
+            'jng',
+            'jpeg',
+            'jxl',
+            'svg',
+            'tif',
+            'wbmp',
+        ),
+        # subtitle
+        MEDIA_EXTENSIONS.subtitles, (
+            'dfxp',
+            'fs',
+            'ismt',
+            'sami',
+            'scc',
+            'ssa',
+            'tt',
+        ),
+        # others
+        MEDIA_EXTENSIONS.manifests,
+        (
+            # not used in yt-dl
+            # *MEDIA_EXTENSIONS.storyboards,
+            # 'desktop',
+            # 'ism',
+            # 'm3u',
+            # 'sbv',
+            # 'swp',
+            # 'url',
+            # 'webloc',
+            # 'xml',
+        )))
+
+    def __init__(self, extension):
+        super(_UnsafeExtensionError, self).__init__('unsafe file extension: {0!r}'.format(extension))
+        self.extension = extension
+
+    @classmethod
+    def sanitize_extension(cls, extension, **kwargs):
+        # ... /, *, prepend=False
+        prepend = kwargs.get('prepend', False)
+
+        if '/' in extension or '\\' in extension:
+            raise cls(extension)
+
+        if not prepend:
+            last = extension.rpartition('.')[-1]
+            if last == 'bin':
+                extension = last = 'unknown_video'
+            if last.lower() not in cls._ALLOWED_EXTENSIONS:
+                raise cls(extension)
+
+        return extension

From 37cea84f775129ad715b9bcd617251c831fcc980 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 2 Jul 2024 14:54:25 +0100
Subject: [PATCH 35/48] [core,utils] Support unpublicised
 `--no-check-extensions`

---
 youtube_dl/__init__.py | 4 ++++
 youtube_dl/options.py  | 4 ++++
 youtube_dl/utils.py    | 6 ++++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index cc8285eba..06bdfb689 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -21,6 +21,7 @@ from .compat import (
     workaround_optparse_bug9161,
 )
 from .utils import (
+    _UnsafeExtensionError,
     DateRange,
     decodeOption,
     DEFAULT_OUTTMPL,
@@ -173,6 +174,9 @@ def _real_main(argv=None):
     if opts.ap_mso and opts.ap_mso not in MSO_INFO:
         parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
 
+    if opts.no_check_extensions:
+        _UnsafeExtensionError.lenient = True
+
     def parse_retries(retries):
         if retries in ('inf', 'infinite'):
             parsed_retries = float('inf')
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 434f520d3..61705d1f0 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -533,6 +533,10 @@ def parseOpts(overrideArguments=None):
         '--no-check-certificate',
         action='store_true', dest='no_check_certificate', default=False,
         help='Suppress HTTPS certificate validation')
+    workarounds.add_option(
+        '--no-check-extensions',
+        action='store_true', dest='no_check_extensions', default=False,
+        help='Suppress file extension validation')
     workarounds.add_option(
         '--prefer-insecure',
         '--prefer-unsecure', action='store_true', dest='prefer_insecure',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index df203b97a..3ec9d3811 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6587,7 +6587,6 @@ KNOWN_EXTENSIONS = (
 class _UnsafeExtensionError(Exception):
     """
     Mitigation exception for unwanted file overwrite/path traversal
-    This should be caught in YoutubeDL.py with a warning
 
     Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
     """
@@ -6666,6 +6665,9 @@ class _UnsafeExtensionError(Exception):
         super(_UnsafeExtensionError, self).__init__('unsafe file extension: {0!r}'.format(extension))
         self.extension = extension
 
+    # support --no-check-extensions
+    lenient = False
+
     @classmethod
     def sanitize_extension(cls, extension, **kwargs):
         # ... /, *, prepend=False
@@ -6678,7 +6680,7 @@ class _UnsafeExtensionError(Exception):
             last = extension.rpartition('.')[-1]
             if last == 'bin':
                 extension = last = 'unknown_video'
-            if last.lower() not in cls._ALLOWED_EXTENSIONS:
+            if not (cls.lenient or last.lower() in cls._ALLOWED_EXTENSIONS):
                 raise cls(extension)
 
         return extension

From f4b47754d9716dbe61372d75e85e965328335ec9 Mon Sep 17 00:00:00 2001
From: Sergey Musatov <akapulse@yandex.ru>
Date: Sat, 6 Jul 2024 13:04:36 +0300
Subject: [PATCH 36/48] [YandexMusic] Download music in High Quality (320
 Kbit/s)

PR #31159
---
 youtube_dl/extractor/yandexmusic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
index 84969f8e1..55d4fb5a0 100644
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -120,6 +120,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
         download_data = self._download_json(
             'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
             track_id, 'Downloading track location url JSON',
+            query={'hq': 1},
             headers={'X-Retpath-Y': url})
 
         fd_data = self._download_json(

From 36801c62df733cfa4f74b607532e012900d8c77f Mon Sep 17 00:00:00 2001
From: unkernet <unkernet@gmail.com>
Date: Sun, 7 Jul 2024 21:18:33 +0200
Subject: [PATCH 37/48] [YandexMusic] Save track version in the title field

PR #32837
* Add track version to track title
---
 youtube_dl/extractor/yandexmusic.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
index 55d4fb5a0..8da5b430f 100644
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -106,6 +106,25 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
     }, {
         'url': 'http://music.yandex.com/album/540508/track/4878838',
         'only_matching': True,
+    }, {
+        'url': 'https://music.yandex.ru/album/16302456/track/85430762',
+        'md5': '11b8d50ab03b57738deeaadf661a0a48',
+        'info_dict': {
+            'id': '85430762',
+            'ext': 'mp3',
+            'abr': 128,
+            'title': 'Haddadi Von Engst, Phonic Youth, Super Flu - Til The End (Super Flu Remix)',
+            'filesize': int,
+            'duration': 431.14,
+            'track': 'Til The End (Super Flu Remix)',
+            'album': 'Til The End',
+            'album_artist': 'Haddadi Von Engst, Phonic Youth',
+            'artist': 'Haddadi Von Engst, Phonic Youth, Super Flu',
+            'release_year': 2021,
+            'genre': 'house',
+            'disc_number': 1,
+            'track_number': 2,
+        }
     }]
 
     def _real_extract(self, url):
@@ -116,6 +135,9 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
             'track', tld, url, track_id, 'Downloading track JSON',
             {'track': '%s:%s' % (track_id, album_id)})['track']
         track_title = track['title']
+        track_version = track.get('version')
+        if track_version:
+            track_title = '%s (%s)' % (track_title, track_version)
 
         download_data = self._download_json(
             'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),

From a452f9437c8a3048f75fc12f75bcfd3eed78430f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 7 Jul 2024 20:43:10 +0100
Subject: [PATCH 38/48] [core] Fix PR #32830 for fixed extensionless output
 template

---
 youtube_dl/YoutubeDL.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c19501915..9e5620eef 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -139,8 +139,8 @@ def _catch_unsafe_file_extension(func):
         except _UnsafeExtensionError as error:
             self.report_error(
                 '{0} found; to avoid damaging your system, this value is disallowed.'
-                ' If you believe this is an error{1}').format(
-                    error.message, bug_reports_message(','))
+                ' If you believe this is an error{1}'.format(
+                    error_to_compat_str(error), bug_reports_message(',')))
 
     return wrapper
 
@@ -2114,18 +2114,26 @@ class YoutubeDL(object):
                         # TODO: Check acodec/vcodec
                         return False
 
-                    filename_real_ext = os.path.splitext(filename)[1][1:]
-                    filename_wo_ext = (
-                        os.path.splitext(filename)[0]
-                        if filename_real_ext == info_dict['ext']
-                        else filename)
+                    exts = [info_dict['ext']]
                     requested_formats = info_dict['requested_formats']
                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
                         info_dict['ext'] = 'mkv'
                         self.report_warning(
                             'Requested formats are incompatible for merge and will be merged into mkv.')
+                    exts.append(info_dict['ext'])
+
                     # Ensure filename always has a correct extension for successful merge
-                    filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+                    def correct_ext(filename, ext=exts[1]):
+                        if filename == '-':
+                            return filename
+                        f_name, f_real_ext = os.path.splitext(filename)
+                        f_real_ext = f_real_ext[1:]
+                        filename_wo_ext = f_name if f_real_ext in exts else filename
+                        if ext is None:
+                            ext = f_real_ext or None
+                        return join_nonempty(filename_wo_ext, ext, delim='.')
+
+                    filename = correct_ext(filename)
                     if os.path.exists(encodeFilename(filename)):
                         self.to_screen(
                             '[download] %s has already been downloaded and '
@@ -2135,8 +2143,9 @@ class YoutubeDL(object):
                             new_info = dict(info_dict)
                             new_info.update(f)
                             fname = prepend_extension(
-                                self.prepare_filename(new_info),
-                                'f%s' % f['format_id'], new_info['ext'])
+                                correct_ext(
+                                    self.prepare_filename(new_info), new_info['ext']),
+                                'f%s' % (f['format_id'],), new_info['ext'])
                             if not ensure_dir_exists(fname):
                                 return
                             downloaded.append(fname)

From 43a74c5fa5e43ee563b1488634585cb0bf02ea24 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Fri, 5 Jul 2024 13:41:30 -0500
Subject: [PATCH 39/48] [core] Address gaps in allowed extensions

Adds some extensions missing in 46521096433aceaa41b4caa845bed22ca6f377ce
(from yt-dlp/yt-dlp#10362)

Authored by: bashonly
Co-authored by: dirkf
---
 youtube_dl/utils.py | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 3ec9d3811..ac1e78002 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6604,27 +6604,53 @@ class _UnsafeExtensionError(Exception):
         ),
         # video
         MEDIA_EXTENSIONS.video, (
-            'avif',
+            'asx',
             'ismv',
+            'm2t',
             'm2ts',
+            'm2v',
             'm4s',
             'mng',
+            'mp2v',
+            'mp4v',
+            'mpe',
             'mpeg',
+            'mpeg1',
+            'mpeg2',
+            'mpeg4',
+            'mxf',
+            'ogm',
             'qt',
+            'rm',
             'swf',
             'ts',
+            'vob',
             'vp9',
-            'wvm',
         ),
         # audio
         MEDIA_EXTENSIONS.audio, (
+            '3ga',
+            'ac3',
+            'adts',
+            'aif',
+            'au',
+            'dts',
             'isma',
+            'it',
             'mid',
+            'mod',
             'mpga',
+            'mp1',
+            'mp2',
+            'mp4a',
+            'mpa',
             'ra',
+            'shn',
+            'xm',
         ),
         # image
         MEDIA_EXTENSIONS.thumbnails, (
+            'avif',
             'bmp',
             'gif',
             'ico',
@@ -6634,6 +6660,7 @@ class _UnsafeExtensionError(Exception):
             'jxl',
             'svg',
             'tif',
+            'tiff',
             'wbmp',
         ),
         # subtitle
@@ -6641,10 +6668,15 @@ class _UnsafeExtensionError(Exception):
             'dfxp',
             'fs',
             'ismt',
+            'json3',
             'sami',
             'scc',
+            'srv1',
+            'srv2',
+            'srv3',
             'ssa',
             'tt',
+            'xml',
         ),
         # others
         MEDIA_EXTENSIONS.manifests,
@@ -6658,7 +6690,6 @@ class _UnsafeExtensionError(Exception):
             # 'swp',
             # 'url',
             # 'webloc',
-            # 'xml',
         )))
 
     def __init__(self, extension):

From 756f6b45c7c9a77f0fa912f882afe7e04878a01b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 10 Jul 2024 17:58:38 +0100
Subject: [PATCH 40/48] [jsinterp] Re-align JSInterp and tests (esp.) with
 yt-dlp Thx: various yt-dlp authors

---
 test/test_jsinterp.py  | 675 +++++++++++++++--------------------------
 youtube_dl/jsinterp.py |  53 ++--
 2 files changed, 268 insertions(+), 460 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index da8e98020..86d7f0d46 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -11,194 +11,146 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re
 
+from youtube_dl.compat import compat_str
 from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
 
+NaN = object()
+
 
 class TestJSInterpreter(unittest.TestCase):
+    def _test(self, jsi_or_code, expected, func='f', args=()):
+        if isinstance(jsi_or_code, compat_str):
+            jsi_or_code = JSInterpreter(jsi_or_code)
+        got = jsi_or_code.call_function(func, *args)
+        if expected is NaN:
+            self.assertTrue(math.isnan(got), '{0} is not NaN'.format(got))
+        else:
+            self.assertEqual(got, expected)
+
     def test_basic(self):
-        jsi = JSInterpreter('function x(){;}')
-        self.assertEqual(jsi.call_function('x'), None)
-        self.assertEqual(repr(jsi.extract_function('x')), 'F<x>')
+        jsi = JSInterpreter('function f(){;}')
+        self.assertEqual(repr(jsi.extract_function('f')), 'F<f>')
+        self._test(jsi, None)
 
-        jsi = JSInterpreter('function x3(){return 42;}')
-        self.assertEqual(jsi.call_function('x3'), 42)
-
-        jsi = JSInterpreter('function x3(){42}')
-        self.assertEqual(jsi.call_function('x3'), None)
-
-        jsi = JSInterpreter('var x5 = function(){return 42;}')
-        self.assertEqual(jsi.call_function('x5'), 42)
-
-    def test_calc(self):
-        jsi = JSInterpreter('function x4(a){return 2*a+1;}')
-        self.assertEqual(jsi.call_function('x4', 3), 7)
+        self._test('function f(){return 42;}', 42)
+        self._test('function f(){42}', None)
+        self._test('var f = function(){return 42;}', 42)
 
     def test_add(self):
-        jsi = JSInterpreter('function f(){return 42 + 7;}')
-        self.assertEqual(jsi.call_function('f'), 49)
-        jsi = JSInterpreter('function f(){return 42 + undefined;}')
-        self.assertTrue(math.isnan(jsi.call_function('f')))
-        jsi = JSInterpreter('function f(){return 42 + null;}')
-        self.assertEqual(jsi.call_function('f'), 42)
+        self._test('function f(){return 42 + 7;}', 49)
+        self._test('function f(){return 42 + undefined;}', NaN)
+        self._test('function f(){return 42 + null;}', 42)
 
     def test_sub(self):
-        jsi = JSInterpreter('function f(){return 42 - 7;}')
-        self.assertEqual(jsi.call_function('f'), 35)
-        jsi = JSInterpreter('function f(){return 42 - undefined;}')
-        self.assertTrue(math.isnan(jsi.call_function('f')))
-        jsi = JSInterpreter('function f(){return 42 - null;}')
-        self.assertEqual(jsi.call_function('f'), 42)
+        self._test('function f(){return 42 - 7;}', 35)
+        self._test('function f(){return 42 - undefined;}', NaN)
+        self._test('function f(){return 42 - null;}', 42)
 
     def test_mul(self):
-        jsi = JSInterpreter('function f(){return 42 * 7;}')
-        self.assertEqual(jsi.call_function('f'), 294)
-        jsi = JSInterpreter('function f(){return 42 * undefined;}')
-        self.assertTrue(math.isnan(jsi.call_function('f')))
-        jsi = JSInterpreter('function f(){return 42 * null;}')
-        self.assertEqual(jsi.call_function('f'), 0)
+        self._test('function f(){return 42 * 7;}', 294)
+        self._test('function f(){return 42 * undefined;}', NaN)
+        self._test('function f(){return 42 * null;}', 0)
 
     def test_div(self):
         jsi = JSInterpreter('function f(a, b){return a / b;}')
-        self.assertTrue(math.isnan(jsi.call_function('f', 0, 0)))
-        self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1)))
-        self.assertTrue(math.isinf(jsi.call_function('f', 2, 0)))
-        self.assertEqual(jsi.call_function('f', 0, 3), 0)
+        self._test(jsi, NaN, args=(0, 0))
+        self._test(jsi, NaN, args=(JS_Undefined, 1))
+        self._test(jsi, float('inf'), args=(2, 0))
+        self._test(jsi, 0, args=(0, 3))
 
     def test_mod(self):
-        jsi = JSInterpreter('function f(){return 42 % 7;}')
-        self.assertEqual(jsi.call_function('f'), 0)
-        jsi = JSInterpreter('function f(){return 42 % 0;}')
-        self.assertTrue(math.isnan(jsi.call_function('f')))
-        jsi = JSInterpreter('function f(){return 42 % undefined;}')
-        self.assertTrue(math.isnan(jsi.call_function('f')))
+        self._test('function f(){return 42 % 7;}', 0)
+        self._test('function f(){return 42 % 0;}', NaN)
+        self._test('function f(){return 42 % undefined;}', NaN)
 
     def test_exp(self):
-        jsi = JSInterpreter('function f(){return 42 ** 2;}')
-        self.assertEqual(jsi.call_function('f'), 1764)
-        jsi = JSInterpreter('function f(){return 42 ** undefined;}')
-        self.assertTrue(math.isnan(jsi.call_function('f')))
-        jsi = JSInterpreter('function f(){return 42 ** null;}')
-        self.assertEqual(jsi.call_function('f'), 1)
-        jsi = JSInterpreter('function f(){return undefined ** 42;}')
-        self.assertTrue(math.isnan(jsi.call_function('f')))
+        self._test('function f(){return 42 ** 2;}', 1764)
+        self._test('function f(){return 42 ** undefined;}', NaN)
+        self._test('function f(){return 42 ** null;}', 1)
+        self._test('function f(){return undefined ** 42;}', NaN)
+
+    def test_calc(self):
+        self._test('function f(a){return 2*a+1;}', 7, args=[3])
 
     def test_empty_return(self):
-        jsi = JSInterpreter('function f(){return; y()}')
-        self.assertEqual(jsi.call_function('f'), None)
+        self._test('function f(){return; y()}', None)
 
     def test_morespace(self):
-        jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
-        self.assertEqual(jsi.call_function('x', 3), 7)
-
-        jsi = JSInterpreter('function f () { x =  2  ; return x; }')
-        self.assertEqual(jsi.call_function('f'), 2)
+        self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3])
+        self._test('function f () { x =  2  ; return x; }', 2)
 
     def test_strange_chars(self):
-        jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
-        self.assertEqual(jsi.call_function('$_xY1', 20), 21)
+        self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }',
+                   21, args=[20], func='$_xY1')
 
     def test_operators(self):
-        jsi = JSInterpreter('function f(){return 1 << 5;}')
-        self.assertEqual(jsi.call_function('f'), 32)
-
-        jsi = JSInterpreter('function f(){return 2 ** 5}')
-        self.assertEqual(jsi.call_function('f'), 32)
-
-        jsi = JSInterpreter('function f(){return 19 & 21;}')
-        self.assertEqual(jsi.call_function('f'), 17)
-
-        jsi = JSInterpreter('function f(){return 11 >> 2;}')
-        self.assertEqual(jsi.call_function('f'), 2)
-
-        jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
-        self.assertEqual(jsi.call_function('f'), 5)
-
-        jsi = JSInterpreter('function f(){return 1 == 2}')
-        self.assertEqual(jsi.call_function('f'), False)
-
-        jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
-        self.assertEqual(jsi.call_function('f'), 2)
-
-        jsi = JSInterpreter('function f(){return 0 ?? 42;}')
-        self.assertEqual(jsi.call_function('f'), 0)
-
-        jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
-        self.assertFalse(jsi.call_function('f'))
+        self._test('function f(){return 1 << 5;}', 32)
+        self._test('function f(){return 2 ** 5}', 32)
+        self._test('function f(){return 19 & 21;}', 17)
+        self._test('function f(){return 11 >> 2;}', 2)
+        self._test('function f(){return []? 2+3: 4;}', 5)
+        self._test('function f(){return 1 == 2}', False)
+        self._test('function f(){return 0 && 1 || 2;}', 2)
+        self._test('function f(){return 0 ?? 42;}', 0)
+        self._test('function f(){return "life, the universe and everything" < 42;}', False)
+        # https://github.com/ytdl-org/youtube-dl/issues/32815
+        self._test('function f(){return 0  - 7 * - 6;}', 42)
 
     def test_array_access(self):
-        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
-        self.assertEqual(jsi.call_function('f'), [5, 2, 7])
+        self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
 
     def test_parens(self):
-        jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
-        self.assertEqual(jsi.call_function('f'), 7)
-
-        jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
-        self.assertEqual(jsi.call_function('f'), 9)
+        self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7)
+        self._test('function f(){return (1 + 2) * 3;}', 9)
 
     def test_quotes(self):
-        jsi = JSInterpreter(r'function f(){return "a\"\\("}')
-        self.assertEqual(jsi.call_function('f'), r'a"\(')
+        self._test(r'function f(){return "a\"\\("}', r'a"\(')
 
     def test_assignments(self):
-        jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
-        self.assertEqual(jsi.call_function('f'), 31)
-
-        jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
-        self.assertEqual(jsi.call_function('f'), 51)
-
-        jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
-        self.assertEqual(jsi.call_function('f'), -11)
+        self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
+        self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
+        self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
 
+    @unittest.skip('Not yet fully implemented')
     def test_comments(self):
-        'Skipping: Not yet fully implemented'
-        return
-        jsi = JSInterpreter('''
-        function x() {
-            var x = /* 1 + */ 2;
-            var y = /* 30
-            * 40 */ 50;
-            return x + y;
-        }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 52)
+        self._test('''
+            function f() {
+                var x = /* 1 + */ 2;
+                var y = /* 30
+                * 40 */ 50;
+                return x + y;
+            }
+        ''', 52)
 
-        jsi = JSInterpreter('''
-        function f() {
-            var x = "/*";
-            var y = 1 /* comment */ + 2;
-            return y;
-        }
-        ''')
-        self.assertEqual(jsi.call_function('f'), 3)
+        self._test('''
+            function f() {
+                var x = "/*";
+                var y = 1 /* comment */ + 2;
+                return y;
+            }
+        ''', 3)
 
     def test_precedence(self):
-        jsi = JSInterpreter('''
-        function x() {
-            var a = [10, 20, 30, 40, 50];
-            var b = 6;
-            a[0]=a[b%a.length];
-            return a;
-        }''')
-        self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
+        self._test('''
+            function f() {
+                var a = [10, 20, 30, 40, 50];
+                var b = 6;
+                a[0]=a[b%a.length];
+                return a;
+            }
+        ''', [20, 20, 30, 40, 50])
 
     def test_builtins(self):
-        jsi = JSInterpreter('''
-        function x() { return NaN }
-        ''')
-        self.assertTrue(math.isnan(jsi.call_function('x')))
+        self._test('function f() { return NaN }', NaN)
 
     def test_Date(self):
-        jsi = JSInterpreter('''
-        function x(dt) { return new Date(dt) - 0; }
-        ''')
-        self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
+        self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
 
+        jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
         # date format m/d/y
-        self.assertEqual(jsi.call_function('x', '12/31/1969 18:01:26 MDT'), 86000)
-
+        self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
         # epoch 0
-        self.assertEqual(jsi.call_function('x', '1 January 1970 00:00:00 UTC'), 0)
+        self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
 
     def test_call(self):
         jsi = JSInterpreter('''
@@ -206,179 +158,115 @@ class TestJSInterpreter(unittest.TestCase):
         function y(a) { return x() + (a?a:0); }
         function z() { return y(3); }
         ''')
-        self.assertEqual(jsi.call_function('z'), 5)
-        self.assertEqual(jsi.call_function('y'), 2)
+        self._test(jsi, 5, func='z')
+        self._test(jsi, 2, func='y')
 
     def test_if(self):
-        jsi = JSInterpreter('''
-        function x() {
+        self._test('''
+            function f() {
             let a = 9;
             if (0==0) {a++}
             return a
-        }''')
-        self.assertEqual(jsi.call_function('x'), 10)
+            }
+        ''', 10)
 
-        jsi = JSInterpreter('''
-        function x() {
+        self._test('''
+            function f() {
             if (0==0) {return 10}
-        }''')
-        self.assertEqual(jsi.call_function('x'), 10)
+            }
+        ''', 10)
 
-        jsi = JSInterpreter('''
-        function x() {
+        self._test('''
+            function f() {
             if (0!=0) {return 1}
             else {return 10}
-        }''')
-        self.assertEqual(jsi.call_function('x'), 10)
-
-        """  # Unsupported
-        jsi = JSInterpreter('''
-        function x() {
-            if (0!=0) return 1;
-            else {return 10}
-        }''')
-        self.assertEqual(jsi.call_function('x'), 10)
-        """
+            }
+        ''', 10)
 
     def test_elseif(self):
-        jsi = JSInterpreter('''
-        function x() {
-            if (0!=0) {return 1}
-            else if (1==0) {return 2}
-            else {return 10}
-        }''')
-        self.assertEqual(jsi.call_function('x'), 10)
-
-        """  # Unsupported
-        jsi = JSInterpreter('''
-        function x() {
-            if (0!=0) return 1;
-            else if (1==0) {return 2}
-            else {return 10}
-        }''')
-        self.assertEqual(jsi.call_function('x'), 10)
-        # etc
-        """
+        self._test('''
+            function f() {
+                if (0!=0) {return 1}
+                else if (1==0) {return 2}
+                else {return 10}
+            }
+        ''', 10)
 
     def test_for_loop(self):
-        # function x() { a=0; for (i=0; i-10; i++) {a++} a }
-        jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i++) {a++} return a }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 10)
+        self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10)
 
     def test_while_loop(self):
-        # function x() { a=0; while (a<10) {a++} a }
-        jsi = JSInterpreter('''
-        function x() { a=0; while (a<10) {a++} return a }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 10)
+        self._test('function f() { a=0; while (a<10) {a++} return a }', 10)
 
     def test_switch(self):
         jsi = JSInterpreter('''
-        function x(f) { switch(f){
-            case 1:f+=1;
-            case 2:f+=2;
-            case 3:f+=3;break;
-            case 4:f+=4;
-            default:f=0;
-        } return f }
+            function f(x) { switch(x){
+                case 1:x+=1;
+                case 2:x+=2;
+                case 3:x+=3;break;
+                case 4:x+=4;
+                default:x=0;
+            } return x }
         ''')
-        self.assertEqual(jsi.call_function('x', 1), 7)
-        self.assertEqual(jsi.call_function('x', 3), 6)
-        self.assertEqual(jsi.call_function('x', 5), 0)
+        self._test(jsi, 7, args=[1])
+        self._test(jsi, 6, args=[3])
+        self._test(jsi, 0, args=[5])
 
     def test_switch_default(self):
         jsi = JSInterpreter('''
-        function x(f) { switch(f){
-            case 2: f+=2;
-            default: f-=1;
-            case 5:
-            case 6: f+=6;
-            case 0: break;
-            case 1: f+=1;
-        } return f }
+            function f(x) { switch(x){
+                case 2: x+=2;
+                default: x-=1;
+                case 5:
+                case 6: x+=6;
+                case 0: break;
+                case 1: x+=1;
+            } return x }
         ''')
-        self.assertEqual(jsi.call_function('x', 1), 2)
-        self.assertEqual(jsi.call_function('x', 5), 11)
-        self.assertEqual(jsi.call_function('x', 9), 14)
+        self._test(jsi, 2, args=[1])
+        self._test(jsi, 11, args=[5])
+        self._test(jsi, 14, args=[9])
 
     def test_try(self):
-        jsi = JSInterpreter('''
-        function x() { try{return 10} catch(e){return 5} }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 10)
+        self._test('function f() { try{return 10} catch(e){return 5} }', 10)
 
     def test_catch(self):
-        jsi = JSInterpreter('''
-        function x() { try{throw 10} catch(e){return 5} }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 5)
+        self._test('function f() { try{throw 10} catch(e){return 5} }', 5)
 
     def test_finally(self):
-        jsi = JSInterpreter('''
-        function x() { try{throw 10} finally {return 42} }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 42)
-        jsi = JSInterpreter('''
-        function x() { try{throw 10} catch(e){return 5} finally {return 42} }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 42)
+        self._test('function f() { try{throw 10} finally {return 42} }', 42)
+        self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42)
 
     def test_nested_try(self):
-        jsi = JSInterpreter('''
-        function x() {try {
-            try{throw 10} finally {throw 42}
+        self._test('''
+            function f() {try {
+                try{throw 10} finally {throw 42}
             } catch(e){return 5} }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 5)
+        ''', 5)
 
     def test_for_loop_continue(self):
-        jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 0)
+        self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0)
 
     def test_for_loop_break(self):
-        jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 0)
+        self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0)
 
     def test_for_loop_try(self):
-        jsi = JSInterpreter('''
-        function x() {
-            for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
-            return 42 }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 42)
+        self._test('''
+            function f() {
+                for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
+                return 42 }
+        ''', 42)
 
     def test_literal_list(self):
-        jsi = JSInterpreter('''
-        function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [5, 6, 7])
+        self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7])
 
     def test_comma(self):
-        jsi = JSInterpreter('''
-        function x() { a=5; a -= 1, a+=3; return a }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 7)
-        jsi = JSInterpreter('''
-        function x() { a=5; return (a -= 1, a+=3, a); }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 7)
-
-        jsi = JSInterpreter('''
-        function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 5)
+        self._test('function f() { a=5; a -= 1, a+=3; return a }', 7)
+        self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
+        self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
 
     def test_void(self):
-        jsi = JSInterpreter('''
-        function x() { return void 42; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), None)
+        self._test('function f() { return void 42; }', None)
 
     def test_return_function(self):
         jsi = JSInterpreter('''
@@ -387,110 +275,60 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('x')([]), 1)
 
     def test_null(self):
-        jsi = JSInterpreter('''
-        function x() { return null; }
-        ''')
-        self.assertIs(jsi.call_function('x'), None)
-
-        jsi = JSInterpreter('''
-        function x() { return [null > 0, null < 0, null == 0, null === 0]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [False, False, False, False])
-
-        jsi = JSInterpreter('''
-        function x() { return [null >= 0, null <= 0]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [True, True])
+        self._test('function f() { return null; }', None)
+        self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }',
+                   [False, False, False, False])
+        self._test('function f() { return [null >= 0, null <= 0]; }', [True, True])
 
     def test_undefined(self):
-        jsi = JSInterpreter('''
-        function x() { return undefined === undefined; }
-        ''')
-        self.assertTrue(jsi.call_function('x'))
+        self._test('function f() { return undefined === undefined; }', True)
+        self._test('function f() { return undefined; }', JS_Undefined)
+        self._test('function f() {return undefined ?? 42; }', 42)
+        self._test('function f() { let v; return v; }', JS_Undefined)
+        self._test('function f() { let v; return v**0; }', 1)
+        self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
+                   [False, False, JS_Undefined, JS_Undefined])
+
+        self._test('''
+            function f() { return [
+                undefined === undefined,
+                undefined == undefined,
+                undefined == null
+            ]; }
+        ''', [True] * 3)
+        self._test('''
+            function f() { return [
+                undefined < undefined,
+                undefined > undefined,
+                undefined === 0,
+                undefined == 0,
+                undefined < 0,
+                undefined > 0,
+                undefined >= 0,
+                undefined <= 0,
+                undefined > null,
+                undefined < null,
+                undefined === null
+            ]; }
+        ''', [False] * 11)
 
         jsi = JSInterpreter('''
-        function x() { return undefined; }
-        ''')
-        self.assertIs(jsi.call_function('x'), JS_Undefined)
-
-        jsi = JSInterpreter('''
-        function x() { let v; return v; }
-        ''')
-        self.assertIs(jsi.call_function('x'), JS_Undefined)
-
-        jsi = JSInterpreter('''
-        function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [True, True, False, False])
-
-        jsi = JSInterpreter('''
-        function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [False, False, False, False])
-
-        jsi = JSInterpreter('''
-        function x() { return [undefined >= 0, undefined <= 0]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [False, False])
-
-        jsi = JSInterpreter('''
-        function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [False, False, True, False])
-
-        jsi = JSInterpreter('''
-        function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [False, True, False, False])
-
-        jsi = JSInterpreter('''
-        function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
+            function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
         ''')
         for y in jsi.call_function('x'):
             self.assertTrue(math.isnan(y))
 
-        jsi = JSInterpreter('''
-        function x() { let v; return v**0; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 1)
-
-        jsi = JSInterpreter('''
-        function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
-
-        jsi = JSInterpreter('function x(){return undefined ?? 42; }')
-        self.assertEqual(jsi.call_function('x'), 42)
-
     def test_object(self):
-        jsi = JSInterpreter('''
-        function x() { return {}; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), {})
-
-        jsi = JSInterpreter('''
-        function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), [42, 0])
-
-        jsi = JSInterpreter('''
-        function x() { let a; return a?.qq; }
-        ''')
-        self.assertIs(jsi.call_function('x'), JS_Undefined)
-
-        jsi = JSInterpreter('''
-        function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
-        ''')
-        self.assertIs(jsi.call_function('x'), JS_Undefined)
+        self._test('function f() { return {}; }', {})
+        self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0])
+        self._test('function f() { let a; return a?.qq; }', JS_Undefined)
+        self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
 
     def test_regex(self):
-        jsi = JSInterpreter('''
-        function x() { let a=/,,[/,913,/](,)}/; }
-        ''')
-        self.assertIs(jsi.call_function('x'), None)
+        self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
 
         jsi = JSInterpreter('''
-        function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
+            function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
         ''')
         attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
                      'split', 'sub', 'subn'))
@@ -500,94 +338,65 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
 
         jsi = JSInterpreter('''
-        function x() { let a=/,,[/,913,/](,)}/i; return a; }
+            function x() { let a=/,,[/,913,/](,)}/i; return a; }
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
-        jsi = JSInterpreter(r'''
-        function x() { let a="data-name".replace("data-", ""); return a }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 'name')
+        jsi = JSInterpreter(r'function f() { let a=/,][}",],()}(\[)/; return a; }')
+        self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)')
 
-        jsi = JSInterpreter(r'''
-        function x() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 'name')
+        jsi = JSInterpreter(r'function f() { let a=[/[)\\]/]; return a[0]; }')
+        self.assertEqual(jsi.call_function('f').pattern, r'[)\\]')
 
-        jsi = JSInterpreter(r'''
-        function x() { let a="data-name".replace(/^.+-/, ""); return a; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 'name')
-
-        jsi = JSInterpreter(r'''
-        function x() { let a="data-name".replace(/a/g, "o"); return a; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 'doto-nome')
-
-        jsi = JSInterpreter(r'''
-        function x() { let a="data-name".replaceAll("a", "o"); return a; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 'doto-nome')
-
-        jsi = JSInterpreter(r'''
-        function x() { let a=[/[)\\]/]; return a[0]; }
-        ''')
-        self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
-
-        """  # fails
-        jsi = JSInterpreter(r'''
-        function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 5)
-        """
+    def test_replace(self):
+        self._test('function f() { let a="data-name".replace("data-", ""); return a }',
+                   'name')
+        self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }',
+                   'name')
+        self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }',
+                   'name')
+        self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }',
+                   'doto-nome')
+        self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }',
+                   'doto-nome')
 
     def test_char_code_at(self):
-        jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
-        self.assertEqual(jsi.call_function('x', 0), 116)
-        self.assertEqual(jsi.call_function('x', 1), 101)
-        self.assertEqual(jsi.call_function('x', 2), 115)
-        self.assertEqual(jsi.call_function('x', 3), 116)
-        self.assertEqual(jsi.call_function('x', 4), None)
-        self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
+        jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
+        self._test(jsi, 116, args=[0])
+        self._test(jsi, 101, args=[1])
+        self._test(jsi, 115, args=[2])
+        self._test(jsi, 116, args=[3])
+        self._test(jsi, None, args=[4])
+        self._test(jsi, 116, args=['not_a_number'])
 
     def test_bitwise_operators_overflow(self):
-        jsi = JSInterpreter('function x(){return -524999584 << 5}')
-        self.assertEqual(jsi.call_function('x'), 379882496)
+        self._test('function f(){return -524999584 << 5}', 379882496)
+        self._test('function f(){return 1236566549 << 5}', 915423904)
 
-        jsi = JSInterpreter('function x(){return 1236566549 << 5}')
-        self.assertEqual(jsi.call_function('x'), 915423904)
+    def test_bitwise_operators_typecast(self):
+        # madness
+        self._test('function f(){return null << 5}', 0)
+        self._test('function f(){return undefined >> 5}', 0)
+        self._test('function f(){return 42 << NaN}', 42)
+        self._test('function f(){return 42 << Infinity}', 42)
 
-    def test_bitwise_operators_madness(self):
-        jsi = JSInterpreter('function x(){return null << 5}')
-        self.assertEqual(jsi.call_function('x'), 0)
-
-        jsi = JSInterpreter('function x(){return undefined >> 5}')
-        self.assertEqual(jsi.call_function('x'), 0)
-
-        jsi = JSInterpreter('function x(){return 42 << NaN}')
-        self.assertEqual(jsi.call_function('x'), 42)
-
-        jsi = JSInterpreter('function x(){return 42 << Infinity}')
-        self.assertEqual(jsi.call_function('x'), 42)
+    def test_negative(self):
+        self._test('function f(){return 2    *    -2.0    ;}', -4)
+        self._test('function f(){return 2    -    - -2    ;}', 0)
+        self._test('function f(){return 2    -    - - -2  ;}', 4)
+        self._test('function f(){return 2    -    + + - -2;}', 0)
+        self._test('function f(){return 2    +    - + - -2;}', 0)
 
     def test_32066(self):
-        jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
-        self.assertEqual(jsi.call_function('x'), 70)
+        self._test(
+            "function f(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}",
+            70)
 
-    def test_unary_operators(self):
-        jsi = JSInterpreter('function f(){return 2  -  - - 2;}')
-        self.assertEqual(jsi.call_function('f'), 0)
-        jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
-        self.assertEqual(jsi.call_function('f'), 0)
-        # https://github.com/ytdl-org/youtube-dl/issues/32815
-        jsi = JSInterpreter('function f(){return 0  - 7 * - 6;}')
-        self.assertEqual(jsi.call_function('f'), 42)
-
-    """ # fails so far
+    @unittest.skip('Not yet working')
     def test_packed(self):
-        jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
-        self.assertEqual(jsi.call_function('x', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
-    """
+        self._test(
+            '''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''',
+            '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 02adf6678..799497acb 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -20,7 +20,9 @@ from .compat import (
     compat_basestring,
     compat_chr,
     compat_collections_chain_map as ChainMap,
+    compat_filter as filter,
     compat_itertools_zip_longest as zip_longest,
+    compat_map as map,
     compat_str,
 )
 
@@ -252,7 +254,7 @@ class Debugger(object):
                     cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
                 raise
             if cls.ENABLED and stmt.strip():
-                if should_ret or not repr(ret) == stmt:
+                if should_ret or repr(ret) != stmt:
                     cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
             return ret, should_ret
         return interpret_statement
@@ -365,6 +367,8 @@ class JSInterpreter(object):
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
         skipping = 0
+        if skip_delims:
+            skip_delims = variadic(skip_delims)
         for idx, char in enumerate(expr):
             paren_delta = 0
             if not in_quote:
@@ -391,7 +395,7 @@ class JSInterpreter(object):
                 continue
             elif pos == 0 and skip_delims:
                 here = expr[idx:]
-                for s in variadic(skip_delims):
+                for s in skip_delims:
                     if here.startswith(s) and s:
                         skipping = len(s) - 1
                         break
@@ -412,7 +416,6 @@ class JSInterpreter(object):
         if delim is None:
             delim = expr and _MATCHING_PARENS[expr[0]]
         separated = list(cls._separate(expr, delim, 1))
-
         if len(separated) < 2:
             raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
@@ -487,6 +490,7 @@ class JSInterpreter(object):
         # fails on (eg) if (...) stmt1; else stmt2;
         sub_statements = list(self._separate(stmt, ';')) or ['']
         expr = stmt = sub_statements.pop().strip()
+
         for sub_stmt in sub_statements:
             ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
             if should_return:
@@ -626,8 +630,7 @@ class JSInterpreter(object):
                     if m.group('err'):
                         catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
                     catch_vars = local_vars.new_child(m=catch_vars)
-                    err = None
-                    pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
+                    err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
 
             m = self._FINALLY_RE.match(expr)
             if m:
@@ -877,12 +880,12 @@ class JSInterpreter(object):
                     self.interpret_expression(v, local_vars, allow_recursion)
                     for v in self._separate(arg_str)]
 
-                if obj == compat_str:
+                if obj is compat_str:
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
                         return ''.join(map(compat_chr, argvals))
                     raise self.Exception('Unsupported string method ' + member, expr=expr)
-                elif obj == float:
+                elif obj is float:
                     if member == 'pow':
                         assertion(len(argvals) == 2, 'takes two arguments')
                         return argvals[0] ** argvals[1]
@@ -907,12 +910,12 @@ class JSInterpreter(object):
                 elif member == 'splice':
                     assertion(isinstance(obj, list), 'must be applied on a list')
                     assertion(argvals, 'takes one or more arguments')
-                    index, howMany = map(int, (argvals + [len(obj)])[:2])
+                    index, how_many = map(int, (argvals + [len(obj)])[:2])
                     if index < 0:
                         index += len(obj)
                     add_items = argvals[2:]
                     res = []
-                    for i in range(index, min(index + howMany, len(obj))):
+                    for _ in range(index, min(index + how_many, len(obj))):
                         res.append(obj.pop(index))
                     for i, item in enumerate(add_items):
                         obj.insert(index + i, item)
@@ -1002,28 +1005,25 @@ class JSInterpreter(object):
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}
-        fields = None
-        for obj_m in re.finditer(
+        fields = next(filter(None, (
+            obj_m.group('fields') for obj_m in re.finditer(
                 r'''(?xs)
                     {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
                         (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
                     }}\s*;
                 '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
-                self.code):
-            fields = obj_m.group('fields')
-            if fields:
-                break
-        else:
+                self.code))), None)
+        if not fields:
             raise self.Exception('Could not find object ' + objname)
         # Currently, it only supports function definitions
-        fields_m = re.finditer(
-            r'''(?x)
-                (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
-            ''' % (_FUNC_NAME_RE, _NAME_RE),
-            fields)
-        for f in fields_m:
+        for f in re.finditer(
+                r'''(?x)
+                    (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
+                ''' % (_FUNC_NAME_RE, _NAME_RE),
+                fields):
             argnames = self.build_arglist(f.group('args'))
-            obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
+            name = remove_quotes(f.group('key'))
+            obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
 
         return obj
 
@@ -1058,7 +1058,7 @@ class JSInterpreter(object):
     def extract_function(self, funcname):
         return function_with_repr(
             self.extract_function_from_code(*self.extract_function_code(funcname)),
-            'F<%s>' % (funcname, ))
+            'F<%s>' % (funcname,))
 
     def extract_function_from_code(self, argnames, code, *global_stack):
         local_vars = {}
@@ -1067,7 +1067,7 @@ class JSInterpreter(object):
             if mobj is None:
                 break
             start, body_start = mobj.span()
-            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            body, remaining = self._separate_at_paren(code[body_start - 1:])
             name = self._named_object(local_vars, self.extract_function_from_code(
                 [x.strip() for x in mobj.group('args').split(',')],
                 body, local_vars, *global_stack))
@@ -1095,8 +1095,7 @@ class JSInterpreter(object):
         argnames = tuple(argnames)
 
         def resf(args, kwargs={}, allow_recursion=100):
-            global_stack[0].update(
-                zip_longest(argnames, args, fillvalue=None))
+            global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
             global_stack[0].update(kwargs)
             var_stack = LocalNameSpace(*global_stack)
             ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)

From 76ac69917ec76ba663da843795f46916831e6da9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 10 Jul 2024 18:02:11 +0100
Subject: [PATCH 41/48] [jsinterp] Further improve expression parsing (fix
 fd8242e) Passes tests from yt-dlp

---
 youtube_dl/jsinterp.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 799497acb..a2074a91e 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -804,16 +804,19 @@ class JSInterpreter(object):
             if op in ('+', '-'):
                 # simplify/adjust consecutive instances of these operators
                 undone = 0
-                while len(separated) > 1 and not separated[-1].strip():
+                separated = [s.strip() for s in separated]
+                while len(separated) > 1 and not separated[-1]:
                     undone += 1
                     separated.pop()
                 if op == '-' and undone % 2 != 0:
                     right_expr = op + right_expr
                 elif op == '+':
-                    while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
+                    while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
+                        right_expr = separated.pop() + right_expr
+                    if separated[-1][-1:] in self.OP_CHARS:
                         right_expr = separated.pop() + right_expr
                 # hanging op at end of left => unary + (strip) or - (push right)
-                left_val = separated[-1]
+                left_val = separated[-1] if separated else ''
                 for dm_op in ('*', '%', '/', '**'):
                     bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
                     if len(bodmas) > 1 and not bodmas[-1].strip():

From d35ce6ce95aac9b98c5f8272824a82e4623b777a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 10 Jul 2024 18:07:20 +0100
Subject: [PATCH 42/48] [jsinterp] Support functionality for player `b22ef6e7`
 * support `prototype` for call() and apply() (yt-dlp/yt-dlp#10392, thx
 Grub4k) * map JS `Array` to `list`

---
 test/test_jsinterp.py  | 27 +++++++++++++++++++++++++++
 youtube_dl/jsinterp.py | 24 +++++++++++++++++++++---
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 86d7f0d46..104e766be 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -398,6 +398,33 @@ class TestJSInterpreter(unittest.TestCase):
             '''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''',
             '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))
 
+    def test_join(self):
+        test_input = list('test')
+        tests = [
+            'function f(a, b){return a.join(b)}',
+            'function f(a, b){return Array.prototype.join.call(a, b)}',
+            'function f(a, b){return Array.prototype.join.apply(a, [b])}',
+        ]
+        for test in tests:
+            jsi = JSInterpreter(test)
+            self._test(jsi, 'test', args=[test_input, ''])
+            self._test(jsi, 't-e-s-t', args=[test_input, '-'])
+            self._test(jsi, '', args=[[], '-'])
+
+    def test_split(self):
+        test_result = list('test')
+        tests = [
+            'function f(a, b){return a.split(b)}',
+            'function f(a, b){return String.prototype.split.call(a, b)}',
+            'function f(a, b){return String.prototype.split.apply(a, [b])}',
+        ]
+        for test in tests:
+            jsi = JSInterpreter(test)
+            self._test(jsi, test_result, args=['test', ''])
+            self._test(jsi, test_result, args=['t-e-s-t', '-'])
+            self._test(jsi, [''], args=['', '-'])
+            self._test(jsi, [], args=['', ''])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a2074a91e..949f77775 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -850,7 +850,7 @@ class JSInterpreter(object):
                     memb = member
                     raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
 
-            def eval_method():
+            def eval_method(variable, member):
                 if (variable, member) == ('console', 'debug'):
                     if Debugger.ENABLED:
                         Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
@@ -858,6 +858,7 @@ class JSInterpreter(object):
                 types = {
                     'String': compat_str,
                     'Math': float,
+                    'Array': list,
                 }
                 obj = local_vars.get(variable)
                 if obj in (JS_Undefined, None):
@@ -883,6 +884,23 @@ class JSInterpreter(object):
                     self.interpret_expression(v, local_vars, allow_recursion)
                     for v in self._separate(arg_str)]
 
+                # Fixup prototype call
+                if isinstance(obj, type):
+                    new_member, rest = member.partition('.')[0::2]
+                    if new_member == 'prototype':
+                        new_member, func_prototype = rest.partition('.')[0::2]
+                        assertion(argvals, 'takes one or more arguments')
+                        assertion(isinstance(argvals[0], obj), 'must bind to type {0}'.format(obj))
+                        if func_prototype == 'call':
+                            obj = argvals.pop(0)
+                        elif func_prototype == 'apply':
+                            assertion(len(argvals) == 2, 'takes two arguments')
+                            obj, argvals = argvals
+                            assertion(isinstance(argvals, list), 'second argument must be a list')
+                        else:
+                            raise self.Exception('Unsupported Function method ' + func_prototype, expr)
+                        member = new_member
+
                 if obj is compat_str:
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
@@ -976,11 +994,11 @@ class JSInterpreter(object):
 
             if remaining:
                 ret, should_abort = self.interpret_statement(
-                    self._named_object(local_vars, eval_method()) + remaining,
+                    self._named_object(local_vars, eval_method(variable, member)) + remaining,
                     local_vars, allow_recursion)
                 return ret, should_return or should_abort
             else:
-                return eval_method(), should_return
+                return eval_method(variable, member), should_return
 
         elif md.get('function'):
             fname = m.group('fname')

From 16f5bbc464602773e61eeafef51d1dbc47987bb4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 10 Jul 2024 18:20:59 +0100
Subject: [PATCH 43/48] [YouTube] Fix nsig processing for player `b22ef6e7` *
 improve extraction of function name (like yt-dlp/yt-dlp#10390) * always use
 JSInterp to extract function code (yt-dlp/yt-dlp#10396, thx seproDev,
 pukkandan)

---
 test/test_youtube_signature.py  |  4 ++++
 youtube_dl/extractor/youtube.py | 24 ++++++++++--------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index cafba7a5c..cc18d0f7b 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -162,6 +162,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
         '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
     ),
+    (
+        'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
+        'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
+    ),
 ]
 
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 90c16e172..2e31a8979 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1636,7 +1636,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         try:
             jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
         except ExtractorError as e:
-            raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
+            raise ExtractorError('Unable to extract nsig function code', cause=e)
         if self.get_param('youtube_print_sig_code'):
             self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
                 player_id, func_code[1]))
@@ -1658,8 +1658,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_n_function_name(self, jscode):
         func_name, idx = self._search_regex(
-            r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
-            jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+            # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
+            # old: .get("n"))&&(b=nfunc[idx](b)
+            # older: .get("n"))&&(b=nfunc(b)
+            r'''(?x)
+                (?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                \.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
+            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
         if not idx:
             return func_name
 
@@ -1679,17 +1685,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         func_name = self._extract_n_function_name(jscode)
 
-        # For redundancy
-        func_code = self._search_regex(
-            r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
-                     # NB: The end of the regex is intentionally kept strict
-                     {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
-            jscode, 'nsig function', group=('var', 'code'), default=None)
-        if func_code:
-            func_code = ([func_code[0]], func_code[1])
-        else:
-            self.write_debug('Extracting nsig function with jsinterp')
-            func_code = jsi.extract_function_code(func_name)
+        func_code = jsi.extract_function_code(func_name)
 
         self.cache.store('youtube-nsig', player_id, func_code)
         return jsi, player_id, func_code

From 451046d62a1b57cc473625c728ff89d7a66933c3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 24 Jul 2024 14:33:34 +0100
Subject: [PATCH 44/48] [Youtube] Make n-sig throttling diagnostic up-to-date

---
 youtube_dl/extractor/youtube.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2e31a8979..bb9f48e4c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1647,7 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         except JSInterpreter.Exception as e:
             self.report_warning(
                 '%s (%s %s)' % (
-                    'Unable to decode n-parameter: download likely to be throttled',
+                    'Unable to decode n-parameter: expect download to be blocked or throttled',
                     error_to_compat_str(e),
                     traceback.format_exc()),
                 video_id=video_id)
@@ -1670,7 +1670,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return func_name
 
         return self._parse_json(self._search_regex(
-            r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
+            r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
             'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
             func_name, transform_source=js_to_json)[int(idx)]
 

From e1b3fa242cb94eb9dc949ab3f2cace91c46f11bf Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 24 Jul 2024 14:45:52 +0100
Subject: [PATCH 45/48] [Youtube] Find `n` function name in player `3400486c`

Fixes #32877
---
 test/test_youtube_signature.py  | 8 ++++++++
 youtube_dl/extractor/youtube.py | 8 ++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index cc18d0f7b..5b4aa3aa0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -166,6 +166,14 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
         'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
     ),
+    (
+        'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
+        'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
+    ),
+    (
+        'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
+        '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
+    ),
 ]
 
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index bb9f48e4c..84371ff06 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1659,11 +1659,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _extract_n_function_name(self, jscode):
         func_name, idx = self._search_regex(
             # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
+            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
             # old: .get("n"))&&(b=nfunc[idx](b)
             # older: .get("n"))&&(b=nfunc(b)
             r'''(?x)
-                (?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
-                \.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                (?:\(\s*(?P<b>[a-z])\s*=\s*(?:
+                    String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
+                    "n+"\[\s*\+?s*[\w$.]+\s*]
+                )\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
                 (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
             ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
         if not idx:

From 71223bff39551a11b6959a3de2dd9e2f070f3c4f Mon Sep 17 00:00:00 2001
From: Aiur Adept <151766879+aiur-adept@users.noreply.github.com>
Date: Thu, 1 Aug 2024 14:18:34 -0400
Subject: [PATCH 46/48] [Youtube] Fix nsig extraction for player 20dfca59 
 (#32891)

* dirkf's patch for nsig extraction
* add generic search per  yt-dlp/yt-dlp/pull/10611 - thx bashonly

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 test/test_youtube_signature.py  |  4 ++++
 youtube_dl/extractor/youtube.py | 32 ++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 5b4aa3aa0..1c5f667f5 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -174,6 +174,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
         '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
     ),
+    (
+        'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
+        '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
+    ),
 ]
 
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 84371ff06..509e374a4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1659,18 +1659,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _extract_n_function_name(self, jscode):
         func_name, idx = self._search_regex(
             # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
-            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
+            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
+            # or:  (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
             # old: .get("n"))&&(b=nfunc[idx](b)
             # older: .get("n"))&&(b=nfunc(b)
             r'''(?x)
-                (?:\(\s*(?P<b>[a-z])\s*=\s*(?:
+                (?:\((?:[\w$()\s]+,)*?\s*(?P<b>[a-z])\s*=\s*(?:
                     String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
-                    "n+"\[\s*\+?s*[\w$.]+\s*]
-                )\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
-                \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                    "n+"\[\s*\+?s*[\w$.]+\s*]|
+                    (?P<b1>(?:[\w$]+\s*\.\s*)+n\b(?:(?!&&).)+\))
+                )\s*
+                    (?(b1)
+                          &&\s*\(\s*(?P=b)|
+                          (?:
+                              ,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                              \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*
+                              &&\s*\(\s*(?(c)(?P=c)|(?P=b))
+                          )
+                    )\s*=\s*
                 (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
-            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
+            default=(None, None))
+        # thx bashonly: yt-dlp/yt-dlp/pull/10611
+        if not func_name:
+            self.report_warning('Falling back to generic n function search')
+            return self._search_regex(
+                r'''(?xs)
+                    (?:(?<=[^\w$])|^)       # instead of \b, which ignores $
+                    (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
+                    \s*\{(?:(?!};).)+?["']enhanced_except_
+                ''', jscode, 'Initial JS player n function name', group='name')
         if not idx:
+            self.report_warning('Falling back to generic n function search')
             return func_name
 
         return self._parse_json(self._search_regex(

From dbc08fba83a0e6c5215e8cec97eb4076f85eee5f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 6 Aug 2024 20:44:30 +0100
Subject: [PATCH 47/48] [jsinterp] Improve slice implementation for player
 b12cc44b

	Partly taken from yt-dlp/yt-dlp#10664, thx seproDev
        Fixes #32896
---
 test/test_jsinterp.py          | 28 ++++++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 ++++
 youtube_dl/jsinterp.py         | 13 ++++++++++---
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 104e766be..c7a4f2cbf 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -425,6 +425,34 @@ class TestJSInterpreter(unittest.TestCase):
             self._test(jsi, [''], args=['', '-'])
             self._test(jsi, [], args=['', ''])
 
+    def test_slice(self):
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
+        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
+        self._test('function f(){return "012345678".slice()}', '012345678')
+        self._test('function f(){return "012345678".slice(0)}', '012345678')
+        self._test('function f(){return "012345678".slice(5)}', '5678')
+        self._test('function f(){return "012345678".slice(99)}', '')
+        self._test('function f(){return "012345678".slice(-2)}', '78')
+        self._test('function f(){return "012345678".slice(-99)}', '012345678')
+        self._test('function f(){return "012345678".slice(0, 0)}', '')
+        self._test('function f(){return "012345678".slice(1, 0)}', '')
+        self._test('function f(){return "012345678".slice(0, 1)}', '0')
+        self._test('function f(){return "012345678".slice(3, 6)}', '345')
+        self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
+        self._test('function f(){return "012345678".slice(-1, 1)}', '')
+        self._test('function f(){return "012345678".slice(-3, -1)}', '67')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 1c5f667f5..56e92fac5 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -178,6 +178,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
         '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
     ),
+    (
+        'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
+        'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 949f77775..a616ad070 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -925,9 +925,16 @@ class JSInterpreter(object):
                     obj.reverse()
                     return obj
                 elif member == 'slice':
-                    assertion(isinstance(obj, list), 'must be applied on a list')
-                    assertion(len(argvals) == 1, 'takes exactly one argument')
-                    return obj[argvals[0]:]
+                    assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
+                    # From [1]:
+                    # .slice() - like [:]
+                    # .slice(n) - like [n:] (not [slice(n)]
+                    # .slice(m, n) - like [m:n] or [slice(m, n)]
+                    # [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
+                    assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
+                    if len(argvals) < 2:
+                        argvals += (None,)
+                    return obj[slice(*argvals)]
                 elif member == 'splice':
                     assertion(isinstance(obj, list), 'must be applied on a list')
                     assertion(argvals, 'takes one or more arguments')

From c5098961b04ce83f4615f2a846c84f803b072639 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 6 Aug 2024 20:59:09 +0100
Subject: [PATCH 48/48] [Youtube] Rework n function extraction pattern 	Now
 also succeeds with player b12cc44b

---
 youtube_dl/extractor/youtube.py | 43 ++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 509e374a4..6fe520e9a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1661,23 +1661,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
             # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
             # or:  (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
-            # old: .get("n"))&&(b=nfunc[idx](b)
-            # older: .get("n"))&&(b=nfunc(b)
+            # or:  (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+            # old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
+            # older: (b=a.get("n"))&&(b=nfunc(b)
             r'''(?x)
-                (?:\((?:[\w$()\s]+,)*?\s*(?P<b>[a-z])\s*=\s*(?:
-                    String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
-                    "n+"\[\s*\+?s*[\w$.]+\s*]|
-                    (?P<b1>(?:[\w$]+\s*\.\s*)+n\b(?:(?!&&).)+\))
-                )\s*
-                    (?(b1)
-                          &&\s*\(\s*(?P=b)|
-                          (?:
-                              ,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
-                              \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*
-                              &&\s*\(\s*(?(c)(?P=c)|(?P=b))
-                          )
-                    )\s*=\s*
-                (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
+                \((?:[\w$()\s]+,)*?\s*      # (
+                (?P<b>[a-z])\s*=\s*         # b=
+                (?:
+                    (?:                     # expect ,c=a.get(b) (etc)
+                        String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
+                        "n+"\[\s*\+?s*[\w$.]+\s*]
+                    )\s*(?:,[\w$()\s]+(?=,))*|
+                       (?P<old>[\w$]+)      # a (old[er])
+                   )\s*
+                   (?(old)
+                                            # b.get("n")
+                       (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
+                       (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
+                       |                    # ,c=a.get(b)
+                       ,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
+                       (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
+                       (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
+                   )
+                                            # interstitial junk
+                   \s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
+               (?(c)(?P=c)|(?P=b))\s*=\s*   # [c|b]=
+                                            # nfunc|nfunc[idx]
+                   (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
             ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
             default=(None, None))
         # thx bashonly: yt-dlp/yt-dlp/pull/10611
@@ -1690,7 +1700,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     \s*\{(?:(?!};).)+?["']enhanced_except_
                 ''', jscode, 'Initial JS player n function name', group='name')
         if not idx:
-            self.report_warning('Falling back to generic n function search')
             return func_name
 
         return self._parse_json(self._search_regex(