mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-05-19 11:39:28 +00:00
[utils] Detect extension from any RFC Content-Disposition syntax
Add support for unquoted token and RFC 5987 extended parameter syntax
This commit is contained in:
parent
7990d1e630
commit
973f76cf7b
|
@ -2069,6 +2069,33 @@ Line 1
|
|||
'Content-Type': b'audio/mp3',
|
||||
})
|
||||
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||
# header with Content-Disposition and unquoted filename
|
||||
urlh = UrlHandle({
|
||||
'Content-Disposition': b'attachment; filename=unquoted_filename_token.mp3',
|
||||
})
|
||||
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||
# header with Content-Disposition including spacing and uppercase
|
||||
urlh = UrlHandle({
|
||||
'Content-Disposition': b'ATTACHMENT; FileName = unquoted_filename_token.mp3',
|
||||
})
|
||||
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||
# header with Content-Disposition and extended filename parameter syntax
|
||||
urlh = UrlHandle({
|
||||
'Content-Disposition': b"attachment; filename*=iso8859-15''costs%201%A4%20filename.mp3",
|
||||
})
|
||||
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||
# header with Content-Disposition and both filename parameter syntaxes
|
||||
urlh = UrlHandle({
|
||||
'Content-Disposition': b'''attachment; filename="should ignore.mp4";
|
||||
FileName* = iso8859-15''costs%201%A4%20filename.mp3''',
|
||||
})
|
||||
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||
# header with Content-Disposition and 'wrong' order of both syntaxes
|
||||
urlh = UrlHandle({
|
||||
'Content-Disposition': b'''attachment; filename*=iso8859-15''costs%201%A4%20filename.mp3;
|
||||
filename="should ignore.mp4"''',
|
||||
})
|
||||
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -66,9 +66,10 @@ from .compat import (
|
|||
compat_urllib_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_request,
|
||||
compat_xpath,
|
||||
)
|
||||
|
@ -4755,9 +4756,22 @@ def urlhandle_detect_ext(url_handle):
|
|||
|
||||
cd = getheader('Content-Disposition')
|
||||
if cd:
|
||||
m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
|
||||
m = re.match(r'''(?xi)
|
||||
attachment;\s*
|
||||
(?:filename\s*=[^;]+?;\s*)? # possible initial filename=...;, ignored
|
||||
filename(?P<x>\*)?\s*=\s* # filename/filename* =
|
||||
(?(x)(?P<charset>\S+?)'[\w-]*'|(?P<q>")?) # if * then charset'...' else maybe "
|
||||
(?P<filename>(?(q)[^"]+(?=")|[^\s;]+)) # actual name of file
|
||||
''', cd)
|
||||
if m:
|
||||
e = determine_ext(m.group('filename'), default_ext=None)
|
||||
m = m.groupdict()
|
||||
filename = m.get('filename')
|
||||
if m.get('x'):
|
||||
try:
|
||||
filename = compat_urllib_parse_unquote(filename, encoding=m.get('charset', 'utf-8'))
|
||||
except LookupError: # unrecognised character set name
|
||||
pass
|
||||
e = determine_ext(filename, default_ext=None)
|
||||
if e:
|
||||
return e
|
||||
|
||||
|
|
Loading…
Reference in a new issue