mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-06-02 10:29:28 +00:00
[utils] Detect extension from any RFC Content-Disposition syntax
Add support for unquoted token and RFC 5987 extended parameter syntax
This commit is contained in:
parent
7990d1e630
commit
973f76cf7b
|
@ -2069,6 +2069,33 @@ Line 1
|
||||||
'Content-Type': b'audio/mp3',
|
'Content-Type': b'audio/mp3',
|
||||||
})
|
})
|
||||||
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||||
|
# header with Content-Disposition and unquoted filename
|
||||||
|
urlh = UrlHandle({
|
||||||
|
'Content-Disposition': b'attachment; filename=unquoted_filename_token.mp3',
|
||||||
|
})
|
||||||
|
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||||
|
# header with Content-Disposition including spacing and uppercase
|
||||||
|
urlh = UrlHandle({
|
||||||
|
'Content-Disposition': b'ATTACHMENT; FileName = unquoted_filename_token.mp3',
|
||||||
|
})
|
||||||
|
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||||
|
# header with Content-Disposition and extended filename parameter syntax
|
||||||
|
urlh = UrlHandle({
|
||||||
|
'Content-Disposition': b"attachment; filename*=iso8859-15''costs%201%A4%20filename.mp3",
|
||||||
|
})
|
||||||
|
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||||
|
# header with Content-Disposition and both filename parameter syntaxes
|
||||||
|
urlh = UrlHandle({
|
||||||
|
'Content-Disposition': b'''attachment; filename="should ignore.mp4";
|
||||||
|
FileName* = iso8859-15''costs%201%A4%20filename.mp3''',
|
||||||
|
})
|
||||||
|
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||||
|
# header with Content-Disposition and 'wrong' order of both syntaxes
|
||||||
|
urlh = UrlHandle({
|
||||||
|
'Content-Disposition': b'''attachment; filename*=iso8859-15''costs%201%A4%20filename.mp3;
|
||||||
|
filename="should ignore.mp4"''',
|
||||||
|
})
|
||||||
|
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -66,9 +66,10 @@ from .compat import (
|
||||||
compat_urllib_HTTPError,
|
compat_urllib_HTTPError,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_parse_qs as compat_parse_qs,
|
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_parse_unquote_plus,
|
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_xpath,
|
compat_xpath,
|
||||||
)
|
)
|
||||||
|
@ -4755,9 +4756,22 @@ def urlhandle_detect_ext(url_handle):
|
||||||
|
|
||||||
cd = getheader('Content-Disposition')
|
cd = getheader('Content-Disposition')
|
||||||
if cd:
|
if cd:
|
||||||
m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
|
m = re.match(r'''(?xi)
|
||||||
|
attachment;\s*
|
||||||
|
(?:filename\s*=[^;]+?;\s*)? # possible initial filename=...;, ignored
|
||||||
|
filename(?P<x>\*)?\s*=\s* # filename/filename* =
|
||||||
|
(?(x)(?P<charset>\S+?)'[\w-]*'|(?P<q>")?) # if * then charset'...' else maybe "
|
||||||
|
(?P<filename>(?(q)[^"]+(?=")|[^\s;]+)) # actual name of file
|
||||||
|
''', cd)
|
||||||
if m:
|
if m:
|
||||||
e = determine_ext(m.group('filename'), default_ext=None)
|
m = m.groupdict()
|
||||||
|
filename = m.get('filename')
|
||||||
|
if m.get('x'):
|
||||||
|
try:
|
||||||
|
filename = compat_urllib_parse_unquote(filename, encoding=m.get('charset', 'utf-8'))
|
||||||
|
except LookupError: # unrecognised character set name
|
||||||
|
pass
|
||||||
|
e = determine_ext(filename, default_ext=None)
|
||||||
if e:
|
if e:
|
||||||
return e
|
return e
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue