diff --git a/test/test_utils.py b/test/test_utils.py index 870032d6c..3928173c8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -102,6 +102,7 @@ from youtube_dl.utils import ( url_or_none, urljoin, urlencode_postdata, + urlhandle_detect_ext, urshift, update_url_query, variadic, @@ -2039,6 +2040,30 @@ Line 1 'a', 'b', 'c', 'd', from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d') + def test_urlhandle_detect_ext(self): + + class UrlHandle(object): + _info = {} + + def __init__(self, info): + self._info = info + + @property + def headers(self): + return self._info + + # header with non-ASCII character and contradictory Content-Type + urlh = UrlHandle({ + 'Content-Disposition': b'attachment; filename="Epis\xf3dio contains non-ASCI ISO 8859-1 character.mp3"', + 'Content-Type': b'audio/aac', + }) + self.assertEqual(urlhandle_detect_ext(urlh), 'mp3') + # header with no Content-Disposition + urlh = UrlHandle({ + 'Content-Type': b'audio/mp3', + }) + self.assertEqual(urlhandle_detect_ext(urlh), 'mp3') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 970bc591a..f35c71601 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4743,7 +4743,8 @@ def parse_codecs(codecs_str): def urlhandle_detect_ext(url_handle): - getheader = url_handle.headers.get + getheader = lambda x: _decode_compat_str( + url_handle.headers.get(x), encoding='iso-8859-1', errors='ignore', or_none=True) cd = getheader('Content-Disposition') if cd: @@ -4753,6 +4754,11 @@ def urlhandle_detect_ext(url_handle): if e: return e + # from yt-dlp/2647c933b8: thx bashonly + meta_ext = (getheader('x-amz-meta-name') or '').rpartition('.')[2] + if meta_ext: + return meta_ext + return mimetype2ext(getheader('Content-Type'))