diff --git a/youtube_dl/extractor/kankids.py b/youtube_dl/extractor/kankids.py index 38866dc6f..19c9baf37 100644 --- a/youtube_dl/extractor/kankids.py +++ b/youtube_dl/extractor/kankids.py @@ -4,8 +4,13 @@ from __future__ import unicode_literals from .common import InfoExtractor import re +CONTENT_DIR = r'/content/kids/' +DOMAIN = r'kankids.org.il' + class KanKidsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?kankids\.org\.il/content/kids/(?P[a-z]+)-main/p-(?P[0-9]+)/(?P\w+)?/?$' + _VALID_URL = r'https?://(?:www\.)?' +\ + DOMAIN.replace('.', '\.') + CONTENT_DIR +\ + r'(?P[a-z]+)-main/p-(?P[0-9]+)/(?P\w+)?/?$' _TEST = { 'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', @@ -32,29 +37,32 @@ class KanKidsIE(InfoExtractor): series_title = self._html_search_regex(r'(?P<title>.+) \|', webpage, 'title') - season = playlist_season if playlist_season else '(?P<season>\w+)' + season = playlist_season if playlist_season else r'(?P<season>\w+)' + content_dir = CONTENT_DIR + category + r'-main/' playlist = set(re.findall( - r'href="/content/kids/' + # Content dir - category + r'-main/' + # Category - 'p-' + series_id + '/' + # Series - season + '/' + # Season - '(?P<id>[0-9]+)/"' + # Episode - '.+title="(?P<title>.+)"' # Title + r'href="' + content_dir + # Content dir + r'p-' + series_id + r'/' + # Series + season + r'/' + # Season + r'(?P<id>[0-9]+)/"' + # Episode + r'.+title="(?P<title>.+)"' # Title , webpage)) # , 'Episode list') print('playlist:', playlist) + entries = [] + content_dir = r'https://www.' + DOMAIN + content_dir for season, video_id, title in playlist if not playlist_season else map(lambda episode: (playlist_season,) + episode, playlist): - pass + entries.append(self.url_result( + content_dir + season + r'/' + video_id + r'/', + ie='Generic', + video_id=video_id, + video_title=title, + )) return { + '_type': 'playlist', 'id': series_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/s1/89707/', - 'ie_key': 'Generic', - '_type': 'url', - # 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) + 'title': series_title, + 'entries': entries, }