mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-22 02:01:50 +00:00
[utils] Improve js_to_json, align with yt-dlp
* support variable substitution, from https://github.com/yt-dlp/yt-dlp/pull/#521 etc, thanks ChillingPepper, Grub4k, pukkandan * improve escape handling, from https://github.com/yt-dlp/yt-dlp/pull/#521 thanks Grub4k * support template strings from https://github.com/yt-dlp/yt-dlp/pull/6623 thanks Grub4k * add limited `!` evaluation (eg, !!0 -> false, see tests)
This commit is contained in:
parent
825a40744b
commit
d9d07a9581
2 changed files with 186 additions and 29 deletions
|
@ -905,6 +905,85 @@ class TestUtil(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||||
|
|
||||||
|
def test_js_to_json_vars_strings(self):
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'null': a,
|
||||||
|
'nullStr': b,
|
||||||
|
'true': c,
|
||||||
|
'trueStr': d,
|
||||||
|
'false': e,
|
||||||
|
'falseStr': f,
|
||||||
|
'unresolvedVar': g,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': 'null',
|
||||||
|
'b': '"null"',
|
||||||
|
'c': 'true',
|
||||||
|
'd': '"true"',
|
||||||
|
'e': 'false',
|
||||||
|
'f': '"false"',
|
||||||
|
'g': 'var',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'null': None,
|
||||||
|
'nullStr': 'null',
|
||||||
|
'true': True,
|
||||||
|
'trueStr': 'true',
|
||||||
|
'false': False,
|
||||||
|
'falseStr': 'false',
|
||||||
|
'unresolvedVar': 'var'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'int': a,
|
||||||
|
'intStr': b,
|
||||||
|
'float': c,
|
||||||
|
'floatStr': d,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': '123',
|
||||||
|
'b': '"123"',
|
||||||
|
'c': '1.23',
|
||||||
|
'd': '"1.23"',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'int': 123,
|
||||||
|
'intStr': '123',
|
||||||
|
'float': 1.23,
|
||||||
|
'floatStr': '1.23',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'object': a,
|
||||||
|
'objectStr': b,
|
||||||
|
'array': c,
|
||||||
|
'arrayStr': d,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': '{}',
|
||||||
|
'b': '"{}"',
|
||||||
|
'c': '[]',
|
||||||
|
'd': '"[]"',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'object': {},
|
||||||
|
'objectStr': '{}',
|
||||||
|
'array': [],
|
||||||
|
'arrayStr': '[]',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def test_js_to_json_realworld(self):
|
def test_js_to_json_realworld(self):
|
||||||
inp = '''{
|
inp = '''{
|
||||||
'clip':{'provider':'pseudo'}
|
'clip':{'provider':'pseudo'}
|
||||||
|
@ -975,10 +1054,10 @@ class TestUtil(unittest.TestCase):
|
||||||
!42: 42
|
!42: 42
|
||||||
}''')
|
}''')
|
||||||
self.assertEqual(json.loads(on), {
|
self.assertEqual(json.loads(on), {
|
||||||
'a': 0,
|
'a': True,
|
||||||
'b': 1,
|
'b': False,
|
||||||
'c': 0,
|
'c': False,
|
||||||
'd': 42.42,
|
'd': True,
|
||||||
'e': [],
|
'e': [],
|
||||||
'f': "abc",
|
'f': "abc",
|
||||||
'g': "",
|
'g': "",
|
||||||
|
@ -1048,10 +1127,26 @@ class TestUtil(unittest.TestCase):
|
||||||
on = js_to_json('{ "040": "040" }')
|
on = js_to_json('{ "040": "040" }')
|
||||||
self.assertEqual(json.loads(on), {'040': '040'})
|
self.assertEqual(json.loads(on), {'040': '040'})
|
||||||
|
|
||||||
|
on = js_to_json('[1,//{},\n2]')
|
||||||
|
self.assertEqual(json.loads(on), [1, 2])
|
||||||
|
|
||||||
|
on = js_to_json(r'"\^\$\#"')
|
||||||
|
self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
|
||||||
|
|
||||||
|
on = js_to_json('\'"\\""\'')
|
||||||
|
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
||||||
|
def test_js_to_json_template_literal(self):
|
||||||
|
self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
|
||||||
|
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||||
|
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||||
|
|
||||||
def test_extract_attributes(self):
|
def test_extract_attributes(self):
|
||||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||||
|
|
|
@ -4365,46 +4365,108 @@ def strip_jsonp(code):
|
||||||
r'\g<callback_data>', code)
|
r'\g<callback_data>', code)
|
||||||
|
|
||||||
|
|
||||||
def js_to_json(code):
|
def js_to_json(code, *args, **kwargs):
|
||||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
|
|
||||||
|
# vars is a dict of (var, val) pairs to substitute
|
||||||
|
vars = args[0] if len(args) > 0 else kwargs.get('vars', {})
|
||||||
|
strict = kwargs.get('strict', False)
|
||||||
|
|
||||||
|
STRING_QUOTES = '\'"`'
|
||||||
|
STRING_RE = '|'.join(r'{0}(?:\\.|[^\\{0}])*{0}'.format(q) for q in STRING_QUOTES)
|
||||||
|
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
||||||
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
|
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
|
||||||
INTEGER_TABLE = (
|
INTEGER_TABLE = (
|
||||||
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
|
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
|
||||||
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
||||||
|
(r'(?s)^(\d+){skip}:?$'.format(skip=SKIP_RE), 10),
|
||||||
)
|
)
|
||||||
|
# compat candidate
|
||||||
|
JSONDecodeError = json.JSONDecodeError if 'JSONDecodeError' in dir(json) else ValueError
|
||||||
|
|
||||||
|
def process_escape(match):
|
||||||
|
JSON_PASSTHROUGH_ESCAPES = r'"\bfnrtu'
|
||||||
|
escape = match.group(1) or match.group(2)
|
||||||
|
|
||||||
|
return ('\\' + escape if escape in JSON_PASSTHROUGH_ESCAPES
|
||||||
|
else '\\u00' if escape == 'x'
|
||||||
|
else '' if escape == '\n'
|
||||||
|
else escape)
|
||||||
|
|
||||||
|
def template_substitute(match):
|
||||||
|
evaluated = js_to_json(match.group(1), vars, strict=strict)
|
||||||
|
if evaluated[0] == '"':
|
||||||
|
return json.loads(evaluated)
|
||||||
|
return evaluated
|
||||||
|
|
||||||
def fix_kv(m):
|
def fix_kv(m):
|
||||||
v = m.group(0)
|
v = m.group(0)
|
||||||
if v in ('true', 'false', 'null'):
|
if v in ('true', 'false', 'null'):
|
||||||
return v
|
return v
|
||||||
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
|
elif v in ('undefined', 'void 0'):
|
||||||
return ""
|
return 'null'
|
||||||
|
elif v.startswith('/*') or v.startswith('//') or v == ',':
|
||||||
|
return ''
|
||||||
|
|
||||||
if v[0] in ("'", '"'):
|
if v[0] in STRING_QUOTES:
|
||||||
v = re.sub(r'(?s)\\.|"', lambda m: {
|
v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
|
||||||
'"': '\\"',
|
escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
|
||||||
"\\'": "'",
|
return '"{0}"'.format(escaped)
|
||||||
'\\\n': '',
|
|
||||||
'\\x': '\\u00',
|
inv = IDENTITY
|
||||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
im = re.split(r'^!+', v)
|
||||||
|
if len(im) > 1 and not im[-1].endswith(':'):
|
||||||
|
if (len(v) - len(im[1])) % 2 == 1:
|
||||||
|
inv = lambda x: 'true' if x == 0 else 'false'
|
||||||
else:
|
else:
|
||||||
|
inv = lambda x: 'false' if x == 0 else 'true'
|
||||||
|
if not any(x for x in im):
|
||||||
|
return
|
||||||
|
v = im[-1]
|
||||||
|
|
||||||
for regex, base in INTEGER_TABLE:
|
for regex, base in INTEGER_TABLE:
|
||||||
im = re.match(regex, v)
|
im = re.match(regex, v)
|
||||||
if im:
|
if im:
|
||||||
i = int(im.group(1), base)
|
i = int(im.group(1), base)
|
||||||
return '"%d":' % i if v.endswith(':') else '%d' % i
|
return ('"%s":' if v.endswith(':') else '%s') % inv(i)
|
||||||
|
|
||||||
return '"%s"' % v
|
if v in vars:
|
||||||
|
try:
|
||||||
|
if not strict:
|
||||||
|
json.loads(vars[v])
|
||||||
|
except JSONDecodeError:
|
||||||
|
return inv(json.dumps(vars[v]))
|
||||||
|
else:
|
||||||
|
return inv(vars[v])
|
||||||
|
|
||||||
|
if not strict:
|
||||||
|
v = try_call(inv, args=(v,), default=v)
|
||||||
|
if v in ('true', 'false'):
|
||||||
|
return v
|
||||||
|
return '"{0}"'.format(v)
|
||||||
|
|
||||||
|
raise ValueError('Unknown value: ' + v)
|
||||||
|
|
||||||
|
def create_map(mobj):
|
||||||
|
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
|
||||||
|
|
||||||
|
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
|
||||||
|
if not strict:
|
||||||
|
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
||||||
|
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
|
||||||
|
code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
|
||||||
|
code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
|
||||||
|
|
||||||
return re.sub(r'''(?sx)
|
return re.sub(r'''(?sx)
|
||||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
{str_}|
|
||||||
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
{comment}|
|
||||||
{comment}|,(?={skip}[\]}}])|
|
,(?={skip}[\]}}])|
|
||||||
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
void\s0|
|
||||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
!*(?:(?<!\d)[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
|
||||||
[0-9]+(?={skip}:)|
|
(?:\b|!+)0(?:[xX][\da-fA-F]+|[0-7]+)(?:{skip}:)?|
|
||||||
|
!+\d+(?:\.\d*)?(?:{skip}:)?|
|
||||||
|
[0-9]+(?:{skip}:)|
|
||||||
!+
|
!+
|
||||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
'''.format(comment=COMMENT_RE, skip=SKIP_RE, str_=STRING_RE), fix_kv, code)
|
||||||
|
|
||||||
|
|
||||||
def qualities(quality_ids):
|
def qualities(quality_ids):
|
||||||
|
|
Loading…
Reference in a new issue