mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-21 17:51:51 +00:00
[jsinterp] Handle new YT players 113ca41c, c57c113c
* add NaN * allow any white-space character for `after_op` * align with yt-dlp f26af78a8ac11d9d617ed31ea5282cfaa5bcbcfa (charcodeAt and bitwise overflow) * allow escaping in regex, fixing player c57c113c
This commit is contained in:
parent
4050e10a4c
commit
55c823634d
3 changed files with 65 additions and 18 deletions
|
@ -135,6 +135,11 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||||
|
|
||||||
def test_builtins(self):
|
def test_builtins(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { return NaN }
|
||||||
|
''')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('x')))
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
||||||
''')
|
''')
|
||||||
|
@ -385,6 +390,22 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
||||||
|
|
||||||
|
def test_char_code_at(self):
|
||||||
|
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
||||||
|
self.assertEqual(jsi.call_function('x', 0), 116)
|
||||||
|
self.assertEqual(jsi.call_function('x', 1), 101)
|
||||||
|
self.assertEqual(jsi.call_function('x', 2), 115)
|
||||||
|
self.assertEqual(jsi.call_function('x', 3), 116)
|
||||||
|
self.assertEqual(jsi.call_function('x', 4), None)
|
||||||
|
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
||||||
|
|
||||||
|
def test_bitwise_operators_overflow(self):
|
||||||
|
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 379882496)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -111,10 +111,26 @@ _NSIG_TESTS = [
|
||||||
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
|
||||||
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
|
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
|
||||||
|
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
|
||||||
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
|
||||||
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
|
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
|
||||||
|
'5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
|
||||||
|
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||||
|
'-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,10 +23,11 @@ from .compat import (
|
||||||
|
|
||||||
def _js_bit_op(op):
|
def _js_bit_op(op):
|
||||||
|
|
||||||
|
def zeroise(x):
|
||||||
|
return 0 if x in (None, JS_Undefined) else x
|
||||||
|
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
def zeroise(x):
|
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
||||||
return 0 if x in (None, JS_Undefined) else x
|
|
||||||
return op(zeroise(a), zeroise(b))
|
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
@ -44,7 +45,7 @@ def _js_arith_op(op):
|
||||||
def _js_div(a, b):
|
def _js_div(a, b):
|
||||||
if JS_Undefined in (a, b) or not (a and b):
|
if JS_Undefined in (a, b) or not (a and b):
|
||||||
return float('nan')
|
return float('nan')
|
||||||
return float('inf') if not b else operator.truediv(a or 0, b)
|
return operator.truediv(a or 0, b) if b else float('inf')
|
||||||
|
|
||||||
|
|
||||||
def _js_mod(a, b):
|
def _js_mod(a, b):
|
||||||
|
@ -260,13 +261,14 @@ class JSInterpreter(object):
|
||||||
counters[_MATCHING_PARENS[char]] += 1
|
counters[_MATCHING_PARENS[char]] += 1
|
||||||
elif char in counters:
|
elif char in counters:
|
||||||
counters[char] -= 1
|
counters[char] -= 1
|
||||||
if not escaping and char in _QUOTES and in_quote in (char, None):
|
if not escaping:
|
||||||
if in_quote or after_op or char != '/':
|
if char in _QUOTES and in_quote in (char, None):
|
||||||
in_quote = None if in_quote and not in_regex_char_group else char
|
if in_quote or after_op or char != '/':
|
||||||
elif in_quote == '/' and char in '[]':
|
in_quote = None if in_quote and not in_regex_char_group else char
|
||||||
in_regex_char_group = char == '['
|
elif in_quote == '/' and char in '[]':
|
||||||
|
in_regex_char_group = char == '['
|
||||||
escaping = not escaping and in_quote and char == '\\'
|
escaping = not escaping and in_quote and char == '\\'
|
||||||
after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op)
|
after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
|
||||||
|
|
||||||
if char != delim[pos] or any(counters.values()) or in_quote:
|
if char != delim[pos] or any(counters.values()) or in_quote:
|
||||||
pos = skipping = 0
|
pos = skipping = 0
|
||||||
|
@ -590,6 +592,8 @@ class JSInterpreter(object):
|
||||||
|
|
||||||
elif expr == 'undefined':
|
elif expr == 'undefined':
|
||||||
return JS_Undefined, should_return
|
return JS_Undefined, should_return
|
||||||
|
elif expr == 'NaN':
|
||||||
|
return float('NaN'), should_return
|
||||||
|
|
||||||
elif md.get('return'):
|
elif md.get('return'):
|
||||||
return local_vars[m.group('name')], should_return
|
return local_vars[m.group('name')], should_return
|
||||||
|
@ -635,7 +639,8 @@ class JSInterpreter(object):
|
||||||
def assertion(cndn, msg):
|
def assertion(cndn, msg):
|
||||||
""" assert, but without risk of getting optimized out """
|
""" assert, but without risk of getting optimized out """
|
||||||
if not cndn:
|
if not cndn:
|
||||||
raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr)
|
memb = member
|
||||||
|
raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
def eval_method():
|
def eval_method():
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
|
@ -737,6 +742,13 @@ class JSInterpreter(object):
|
||||||
return obj.index(idx, start)
|
return obj.index(idx, start)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return -1
|
return -1
|
||||||
|
elif member == 'charCodeAt':
|
||||||
|
assertion(isinstance(obj, compat_str), 'must be applied on a string')
|
||||||
|
# assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
|
||||||
|
idx = argvals[0] if isinstance(argvals[0], int) else 0
|
||||||
|
if idx >= len(obj):
|
||||||
|
return None
|
||||||
|
return ord(obj[idx])
|
||||||
|
|
||||||
idx = int(member) if isinstance(obj, list) else member
|
idx = int(member) if isinstance(obj, list) else member
|
||||||
return obj[idx](argvals, allow_recursion=allow_recursion)
|
return obj[idx](argvals, allow_recursion=allow_recursion)
|
||||||
|
@ -820,12 +832,10 @@ class JSInterpreter(object):
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
break
|
break
|
||||||
start, body_start = mobj.span()
|
start, body_start = mobj.span()
|
||||||
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
|
||||||
name = self._named_object(
|
name = self._named_object(local_vars, self.extract_function_from_code(
|
||||||
local_vars,
|
[x.strip() for x in mobj.group('args').split(',')],
|
||||||
self.extract_function_from_code(
|
body, local_vars, *global_stack))
|
||||||
self.build_arglist(mobj.group('args')),
|
|
||||||
body, local_vars, *global_stack))
|
|
||||||
code = code[:start] + name + remaining
|
code = code[:start] + name + remaining
|
||||||
return self.build_function(argnames, code, local_vars, *global_stack)
|
return self.build_function(argnames, code, local_vars, *global_stack)
|
||||||
|
|
||||||
|
@ -854,7 +864,7 @@ class JSInterpreter(object):
|
||||||
zip_longest(argnames, args, fillvalue=None))
|
zip_longest(argnames, args, fillvalue=None))
|
||||||
global_stack[0].update(kwargs)
|
global_stack[0].update(kwargs)
|
||||||
var_stack = LocalNameSpace(*global_stack)
|
var_stack = LocalNameSpace(*global_stack)
|
||||||
ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
|
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||||
if should_abort:
|
if should_abort:
|
||||||
return ret
|
return ret
|
||||||
return resf
|
return resf
|
||||||
|
|
Loading…
Reference in a new issue