From 70497994707b6bcbd6c0ecc3fb1073be4d75d970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 Mar 2017 23:16:51 +0700 Subject: [PATCH] [discoverygo:playlist] Add extractor (closes #12424) --- youtube_dl/extractor/discoverygo.py | 59 +++++++++++++++++++++++++++-- youtube_dl/extractor/extractors.py | 5 ++- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index 2042493a8..d14de8f00 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -1,17 +1,21 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( extract_attributes, + ExtractorError, int_or_none, parse_age_limit, - ExtractorError, + remove_end, + unescapeHTML, ) -class DiscoveryGoIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?: +class DiscoveryGoBaseIE(InfoExtractor): + _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: discovery| investigationdiscovery| discoverylife| @@ -21,7 +25,11 @@ class DiscoveryGoIE(InfoExtractor): sciencechannel| tlc| velocitychannel - )go\.com/(?:[^/]+/)*(?P[^/?#&]+)''' + )go\.com/%s(?P[^/?#&]+)''' + + +class DiscoveryGoIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' _TEST = { 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', 'info_dict': { @@ -113,3 +121,46 @@ class DiscoveryGoIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' + _TEST = { + 'url': 'https://www.discoverygo.com/bering-sea-gold/', + 'info_dict': { + 'id': 'bering-sea-gold', + 'title': 'Bering Sea Gold', + 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', + }, + 'playlist_mincount': 6, + } + + @classmethod + def suitable(cls, url): + return False if DiscoveryGoIE.suitable(url) else super( + DiscoveryGoPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer(r'data-json=(["\'])(?P{.+?})\1', webpage): + data = self._parse_json( + mobj.group('json'), display_id, + transform_source=unescapeHTML, fatal=False) + if not isinstance(data, dict) or data.get('type') != 'episode': + continue + episode_url = data.get('socialUrl') + if not episode_url: + continue + entries.append(self.url_result( + episode_url, ie=DiscoveryGoIE.ie_key(), + video_id=data.get('id'))) + + return self.playlist_result( + entries, display_id, + remove_end(self._og_search_title( + webpage, fatal=False), ' | Discovery GO'), + self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bd2762e47..24c478932 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -265,7 +265,10 @@ from .dvtv import DVTVIE from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE -from .discoverygo import DiscoveryGoIE +from .discoverygo import ( + DiscoveryGoIE, + DiscoveryGoPlaylistIE, +) from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE