1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-25 11:41:52 +00:00

[utils] Escape all HTML entities written in hexadecimal form

This commit is contained in:
Naglis Jonaitis 2015-03-26 17:15:27 +02:00
parent fbfcc2972b
commit 91757b0f37
2 changed files with 3 additions and 1 deletions

View file

@ -200,6 +200,8 @@ class TestUtil(unittest.TestCase):
def test_unescape_html(self): def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;') self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual( self.assertEqual(
unescapeHTML('é'), 'é') unescapeHTML('é'), 'é')

View file

@ -348,7 +348,7 @@ def _htmlentity_transform(entity):
if entity in compat_html_entities.name2codepoint: if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity]) return compat_chr(compat_html_entities.name2codepoint[entity])
mobj = re.match(r'#(x?[0-9]+)', entity) mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None: if mobj is not None:
numstr = mobj.group(1) numstr = mobj.group(1)
if numstr.startswith('x'): if numstr.startswith('x'):