Added a basic shell for a kankids extractor.

2024-11-28 13:11:49 +00:00 · 2024-06-24 00:10:22 +03:00 · 2024-06-24 00:10:22 +03:00 · 833fe8c9af
commit 833fe8c9af
parent 4d05f84325
2 changed files with 45 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -560,6 +560,7 @@ from .jwplatform import JWPlatformIE
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
 from .kankan import KankanIE
 from .kankids import KanKidsIE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@ -0,0 +1,44 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class KanKidsIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?kankids\.org\.il/content/kids/(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+/)?$'
    _TEST = {
        'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/',
        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
        'info_dict': {
            'id': '42',
            'ext': 'mp4',
            'title': 'Video title goes here',
            'thumbnail': r're:^https?://.*\.jpg$',
            # TODO more properties, either as:
            # * A value
            # * MD5 checksum; start the string with md5:
            # * A regular expression; start the string with re:
            # * Any Python type (for example int or float)
        }
    }
    def _real_extract(self, url):
        a = super()._match_valid_url(url)
        print(a.groupdict())
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        # TODO more code goes here, for example ...
        # title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
        title = 'hi'
        return {
            'id': video_id,
            'title': title,
            'description': self._og_search_description(webpage),
            'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/s1/89707/',
            'ie_key': 'Generic',
            '_type': 'url',
            # 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
            # TODO more properties (see youtube_dl/extractor/common.py)
            }