1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-22 10:11:52 +00:00

[utils] Sanitize look-alike Unicode glyphs in non-ID filename fields when --restrict-filenames

Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test.
This commit is contained in:
dirkf 2022-10-11 12:18:12 +00:00 committed by GitHub
parent 6e2626f092
commit c94a459a24
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -33,6 +33,7 @@ import sys
import tempfile import tempfile
import time import time
import traceback import traceback
import unicodedata
import xml.etree.ElementTree import xml.etree.ElementTree
import zlib import zlib
@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
return '_' return '_'
return char return char
# Replace look-alike Unicode glyphs
if restricted and not is_id:
s = unicodedata.normalize('NFKC', s)
# Handle timestamps # Handle timestamps
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
result = ''.join(map(replace_insane, s)) result = ''.join(map(replace_insane, s))