From 735e87adfc44b284dcdb4d9a0155ce0616e3af97 Mon Sep 17 00:00:00 2001 From: Gabriel Nagy Date: Thu, 13 Apr 2023 01:40:38 +0300 Subject: [PATCH 1/4] [core] Sanitize info dict before dumping JSON (fixes fe7e130) (#32032) * follow up to fe7e130 which didn't fix everything. Co-authored-by: dirkf --- youtube_dl/YoutubeDL.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2719d546f..117f1c513 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1777,7 +1777,7 @@ class YoutubeDL(object): self.to_stdout(formatSeconds(info_dict['duration'])) print_mandatory('format') if self.params.get('forcejson', False): - self.to_stdout(json.dumps(info_dict)) + self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -2091,7 +2091,7 @@ class YoutubeDL(object): raise else: if self.params.get('dump_single_json', False): - self.to_stdout(json.dumps(res)) + self.to_stdout(json.dumps(self.sanitize_info(res))) return self._download_retcode @@ -2100,6 +2100,7 @@ class YoutubeDL(object): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load + # TODO: let's use io.open(), then info = self.filter_requested_info(json.loads('\n'.join(f))) try: self.process_ie_result(info, download=True) From 2da3fa04a68ff0652f49d6874d82b7a0edb85ea3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 11 Apr 2023 17:36:27 +0100 Subject: [PATCH 2/4] [YouTube] Simplify signature patterns --- youtube_dl/extractor/youtube.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ae3416b20..80fff7ada 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -19,6 +19,7 @@ from ..compat import ( compat_urllib_parse_parse_qs as compat_parse_qs, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlparse, + compat_zip as zip, ) from ..jsinterp import JSInterpreter from ..utils import ( @@ -1555,17 +1556,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\bm=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', r'\bc&&\(c=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns - r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'("|\')signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') From 26035bde46c0acc30dc053618451d9aeca4b7709 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 13 Apr 2023 00:15:07 +0100 Subject: [PATCH 3/4] [DashSegmentsFD] Correctly detect errors when `fragment_retries` == 0 * use the success flag instead of the retry count * establish the fragment_url outside the retry loop * only report skipping a fragment once. * resolves #32033 --- youtube_dl/downloader/dash.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 67a8e173f..2800d4260 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -38,12 +38,13 @@ class DashSegmentsFD(FragmentFD): # In DASH, the first segment contains necessary headers to # generate a valid MP4 file, so always abort for the first segment fatal = frag_index == 1 or not skip_unavailable_fragments + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + success = False for count in itertools.count(): try: - fragment_url = fragment.get('url') - if not fragment_url: - assert fragment_base_url - fragment_url = urljoin(fragment_base_url, fragment['path']) success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) if not success: return False @@ -63,14 +64,13 @@ class DashSegmentsFD(FragmentFD): # itself since it has its own retry settings if fatal: raise - self.report_skip_fragment(frag_index) break - if count >= fragment_retries: + if not success: if not fatal: self.report_skip_fragment(frag_index) continue - self.report_error('giving up after %s fragment retries' % fragment_retries) + self.report_error('giving up after %s fragment retries' % count) return False self._finish_frag_download(ctx) From 211cbfd5d46025a8e4d8f9f3d424aaada4698974 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 21 Apr 2023 14:04:30 +0100 Subject: [PATCH 4/4] [jsinterp] Minimally handle arithmetic operator precedence Resolves #32066 --- test/test_jsinterp.py | 11 +++++++++++ youtube_dl/jsinterp.py | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 5d129433d..e121358d7 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -505,6 +505,17 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter('function x(){return 1236566549 << 5}') self.assertEqual(jsi.call_function('x'), 915423904) + def test_32066(self): + jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}") + self.assertEqual(jsi.call_function('x'), 70) + + def test_unary_operators(self): + jsi = JSInterpreter('function f(){return 2 - - - 2;}') + self.assertEqual(jsi.call_function('f'), 0) + # fails + # jsi = JSInterpreter('function f(){return 2 + - + - - 2;}') + # self.assertEqual(jsi.call_function('f'), 0) + """ # fails so far def test_packed(self): jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''') diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index ab7d6f926..a06fc4ff5 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +from functools import update_wrapper import itertools import json import math @@ -23,11 +24,23 @@ from .compat import ( ) +def wraps_op(op): + + def update_and_rename_wrapper(w): + f = update_wrapper(w, op) + # fn names are str in both Py 2/3 + f.__name__ = str('JS_') + f.__name__ + return f + + return update_and_rename_wrapper + + def _js_bit_op(op): def zeroise(x): return 0 if x in (None, JS_Undefined) else x + @wraps_op(op) def wrapped(a, b): return op(zeroise(a), zeroise(b)) & 0xffffffff @@ -36,6 +49,7 @@ def _js_bit_op(op): def _js_arith_op(op): + @wraps_op(op) def wrapped(a, b): if JS_Undefined in (a, b): return float('nan') @@ -66,6 +80,7 @@ def _js_exp(a, b): def _js_eq_op(op): + @wraps_op(op) def wrapped(a, b): if set((a, b)) <= set((None, JS_Undefined)): return op(a, a) @@ -76,6 +91,7 @@ def _js_eq_op(op): def _js_comp_op(op): + @wraps_op(op) def wrapped(a, b): if JS_Undefined in (a, b): return False @@ -356,6 +372,7 @@ class JSInterpreter(object): return right_val try: + # print('Eval:', opfunc.__name__, left_val, right_val) return opfunc(left_val, right_val) except Exception as e: raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e) @@ -395,6 +412,7 @@ class JSInterpreter(object): raise self.Exception('Recursion limit reached') allow_recursion -= 1 + # print('At: ' + stmt[:60]) should_return = False # fails on (eg) if (...) stmt1; else stmt2; sub_statements = list(self._separate(stmt, ';')) or [''] @@ -702,9 +720,24 @@ class JSInterpreter(object): continue right_expr = separated.pop() - while op == '-' and len(separated) > 1 and not separated[-1].strip(): - right_expr = '-' + right_expr - separated.pop() + # handle operators that are both unary and binary, minimal BODMAS + if op in ('+', '-'): + undone = 0 + while len(separated) > 1 and not separated[-1].strip(): + undone += 1 + separated.pop() + if op == '-' and undone % 2 != 0: + right_expr = op + right_expr + left_val = separated[-1] + for dm_op in ('*', '%', '/', '**'): + bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) + if len(bodmas) > 1 and not bodmas[-1].strip(): + expr = op.join(separated) + op + right_expr + right_expr = None + break + if right_expr is None: + continue + left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return @@ -955,6 +988,7 @@ class JSInterpreter(object): def build_function(self, argnames, code, *global_stack): global_stack = list(global_stack) or [{}] argnames = tuple(argnames) + # import pdb; pdb.set_trace() def resf(args, kwargs={}, allow_recursion=100): global_stack[0].update(