From 84f214d840bfdb1b3342dbf6a07d94a25787660f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 5 Jul 2016 17:11:45 +0100 Subject: [PATCH] [prosiebensat1] extract all formats --- youtube_dl/extractor/prosiebensat1.py | 165 ++++++++++++++------------ 1 file changed, 88 insertions(+), 77 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 07d49d489..ad0f0031c 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,7 +5,7 @@ import re from hashlib import sha1 from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode +from ..compat import compat_str from ..utils import ( ExtractorError, determine_ext, @@ -71,6 +71,7 @@ class ProSiebenSat1IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'skip': 'This video is unavailable', }, { 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', @@ -86,6 +87,7 @@ class ProSiebenSat1IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'skip': 'This video is unavailable', }, { 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', @@ -101,6 +103,7 @@ class ProSiebenSat1IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'skip': 'This video is unavailable', }, { 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', @@ -116,6 +119,7 @@ class ProSiebenSat1IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'skip': 'This video is unavailable', }, { 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', @@ -131,6 +135,7 @@ class ProSiebenSat1IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'skip': 'This video is unavailable', }, { 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', @@ -227,70 +232,42 @@ class ProSiebenSat1IE(InfoExtractor): ] def _extract_clip(self, url, webpage): - clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') + clip_id = self._html_search_regex( + self._CLIPID_REGEXES, webpage, 'clip id') access_token = 'prosieben' client_name = 'kolibri-2.0.19-splec4' client_location = url - videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({ - 'access_token': access_token, - 'client_location': client_location, - 'client_name': client_name, - 'ids': clip_id, - }) - - video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0] + video = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos', + clip_id, 'Downloading videos JSON', query={ + 'access_token': access_token, + 'client_location': client_location, + 'client_name': client_name, + 'ids': clip_id, + })[0] if video.get('is_protected') is True: raise ExtractorError('This video is DRM protected.', expected=True) duration = float_or_none(video.get('duration')) - source_ids = [source['id'] for source in video['sources']] - source_ids_str = ','.join(map(str, source_ids)) + source_ids = [compat_str(source['id']) for source in video['sources']] g = '01!8d8F_)r9]4s[qeuXfP%' + client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest() - client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]) - .encode('utf-8')).hexdigest() - - sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - })) - - sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON') + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ + 'access_token': access_token, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': client_name, + }) server_id = sources['server_id'] - client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, - client_location, source_ids_str, g, client_name]) - .encode('utf-8')).hexdigest() - - url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - 'server_id': server_id, - 'source_ids': source_ids_str, - })) - - urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON') - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') - description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - - upload_date = unified_strdate(self._html_search_regex( - self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) - - formats = [] - - urls_sources = urls['sources'] - if isinstance(urls_sources, dict): - urls_sources = urls_sources.values() def fix_bitrate(bitrate): bitrate = int_or_none(bitrate) @@ -298,37 +275,71 @@ class ProSiebenSat1IE(InfoExtractor): return None return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate - for source in urls_sources: - protocol = source['protocol'] - source_url = source['url'] - if protocol == 'rtmp' or protocol == 'rtmpe': - mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) - if not mobj: + formats = [] + for source_id in source_ids: + client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', query={ + 'access_token': access_token, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': client_name, + 'server_id': server_id, + 'source_ids': source_id, + }) + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: continue - path = mobj.group('path') - mp4colon_index = path.rfind('mp4:') - app = path[:mp4colon_index] - play_path = path[mp4colon_index:] - formats.append({ - 'url': '%s/%s' % (mobj.group('url'), app), - 'app': app, - 'play_path': play_path, - 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', - 'page_url': 'http://www.prosieben.de', - 'vbr': fix_bitrate(source['bitrate']), - 'ext': 'mp4', - 'format_id': '%s_%s' % (source['cdn'], source['bitrate']), - }) - elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': - formats.extend(self._extract_f4m_formats(source_url, clip_id)) - else: - formats.append({ - 'url': source_url, - 'vbr': fix_bitrate(source['bitrate']), - }) - + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) self._sort_formats(formats) + description = self._html_search_regex( + self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate(self._html_search_regex( + self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) + return { 'id': clip_id, 'title': title,