From 6b54fcef206d754154f62e5fdacfcea056d1d9dc Mon Sep 17 00:00:00 2001 From: mbunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 01:10:22 +0100 Subject: [PATCH] [VideoCdn] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/videocdn.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/videocdn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 947cbe8fd..e0ddf9d99 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -626,6 +626,7 @@ from .livestream import ( ) from .lnkgo import LnkGoIE from .localnews8 import LocalNews8IE +from .videocdn import VideoCdnIE from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py new file mode 100644 index 000000000..7dd84d063 --- /dev/null +++ b/youtube_dl/extractor/videocdn.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) + +from ..utils import determine_ext + + +class VideoCdnIE(InfoExtractor): + _VALID_URL = r'https?://e\.video-cdn\.net/video?.*video-id=(?P[a-zA-Z0-9-_]+).*' + _TESTS = [ + { + 'url': 'https://e.video-cdn.net/video?video-id=8eBUrWaMJFS38A5X-j2CgY&player-id=53Tun3ZZpZpVuvaTvsm3jU', + 'info_dict': { + 'id': '8eBUrWaMJFS38A5X-j2CgY', + 'ext': 'mp4', + 'title': 'RiskBuster FireFighter VI - Adventskranz', + 'thumbnail': r're:(?i)https://.*\.jpeg', + }, + }, + { + 'url': 'https://e.video-cdn.net/video?video-id=91imQ_wKjkTFghe-3mmBAA&player-id=7nCLZ_ESM8rT9YUw6qUGA9', + 'info_dict': { + 'id': '91imQ_wKjkTFghe-3mmBAA', + 'ext': 'mp4', + 'title': 'SCC2019_Talk_Tychsen_TXL.mp4', + 'thumbnail': r're:(?i)https://.*\.jpeg', + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = [] + + video_id = self._search_regex( + r'(?ims)]+mi24-video-player+.*video-id=[\"\'](?P[A-Za-z0-9_-]+)', + webpage, 'video id', group='videoid') + + thumbnail = self._search_regex( + r'\"thumbnailUrl\":\"(?P[^\"]+)', + webpage, 'thumbnail', group='thumbnail') + + title = self._search_regex( + r'\"name\":\"(?P[^\"]+)', + webpage, 'title', group='title') + + manifest_url = self._search_regex( + r'\"contentUrl\":\"(?P<manifesturl>[^\"]+)', + webpage, + 'manifest_url', group='manifesturl' + ) + + if isinstance(manifest_url, compat_str) and determine_ext(manifest_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + compat_urlparse.urljoin(url, manifest_url), + video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + }