From 3b3f2dd6e1ced36ade3b4cfab1df2ed33d6b8483 Mon Sep 17 00:00:00 2001 From: Thilo Billerbeck Date: Wed, 17 Aug 2022 21:06:59 +0200 Subject: [PATCH] [veejoy] applied suggested changes from pr --- youtube_dl/extractor/extractors.py | 4 +- youtube_dl/extractor/veejoy.py | 139 +++++++++++++++++++---------- 2 files changed, 94 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 13ae1e76d..0891fb1be 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1416,6 +1416,7 @@ from .ustudio import ( from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE +from .veejoy import VeejoyIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import ( @@ -1660,5 +1661,4 @@ from .zingmp3 import ( ZingMp3AlbumIE, ) from .zoom import ZoomIE -from .zype import ZypeIE -from .veejoy import VeejoyIE \ No newline at end of file +from .zype import ZypeIE \ No newline at end of file diff --git a/youtube_dl/extractor/veejoy.py b/youtube_dl/extractor/veejoy.py index f23dcd32f..1d89c66f0 100644 --- a/youtube_dl/extractor/veejoy.py +++ b/youtube_dl/extractor/veejoy.py @@ -1,8 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals -import json from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + determine_ext, + int_or_none, + parse_iso8601, + qualities, + strip_or_none, + try_get, + update_url_query, + url_or_none, +) class VeejoyIE(InfoExtractor): @@ -14,7 +25,9 @@ class VeejoyIE(InfoExtractor): 'ext': 'mp4', 'title': 'On-ride Tyrol Log Flume', 'description': 'Through the ‘magical world of diamonds’ and straight into the cool water. Experience a different kind of water slide with the ‘Tyrol Log Flume’. One of the oldest and most popular attractions in the park!', - 'uploader': 'MACK Media' + 'uploader': 'MACK Media', + 'upload_date': '20210923', + 'timestamp': 1632388920 } }, { 'url': 'https://www.veejoy.de/en/movies/off-to-rulantica', @@ -23,7 +36,9 @@ class VeejoyIE(InfoExtractor): 'ext': 'mp4', 'title': 'Off to Rulantica', 'description': 'Rocking through the water on round boats, creating splashy fun with water cannons – and then, sliding down ‘Svalgurok’ on ten different slides: Soaking wet water fun is calling.', - 'uploader': 'Veejoy' + 'uploader': 'Veejoy', + 'upload_date': '20220811', + 'timestamp': 1660206600 } }, { 'url': 'https://www.veejoy.de/de/series/o-the-construction-documentary/the-building-site-grows', @@ -32,83 +47,113 @@ class VeejoyIE(InfoExtractor): 'ext': 'mp4', 'title': 'Bau-„Leiter“', 'description': 'Auf der Baustelle ist viel passiert. Patrick und Lukas bekommen ein Update vom Bauleiter, erklären technische Grundlagen am „lebenden Objekt“ und stellen sich einer Onride-Challenge.', - 'uploader': 'MACK Media' + 'uploader': 'MACK Media', + 'timestamp': 1658997000, + 'upload_date': '20220728' } }] - def get_video_id(self, url): - return self._match_id(url) - - def get_video_data(self, url, video_id): - webpage = self._download_webpage(url, video_id) - next_data = self._html_search_regex(r'', webpage, 'next_data') - return json.loads(next_data)["props"]["pageProps"]["media"] + def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw): + return self._parse_json( + self._search_regex( + r'(?s)]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)', + webpage, 'next.js data', fatal=fatal, **kw), + video_id, transform_source=transform_source, fatal=fatal) def get_producer(self, video_data): - if "item" in video_data["studioDetails"]: - return video_data["studioDetails"]["item"]["title"] - else: - return "Veejoy" + return ( + strip_or_none( + try_get(video_data, lambda x: x['studioDetails']['item']['title'], compat_str)) + or 'Veejoy') def get_thumbnails(self, video_data): thumbnails = [] - thumb_3_4 = video_data["teaserImage"]["3_4"] - if thumb_3_4: - thumbnails.append({ - 'url': thumb_3_4["srcSet"][1].split(" ")[0], - }) - - thumb_16_9 = video_data["teaserImage"]["16_9"] - if thumb_16_9: - thumbnails.append({ - 'url': thumb_16_9["srcSet"][1].split(" ")[0], - }) + for res in ('3_4', '16_9'): + thumb = try_get(video_data, lambda x: x['teaserImage'][res], dict) + if not thumb: + continue + thumb = url_or_none(try_get(thumb, lambda x: x['srcSet'][1].split(' ')[0])) + if thumb: + thumbnails.append({ + 'id': res, + 'url': thumb, + }) return thumbnails def get_asset_ref(self, video_data): - for mediaAsset in video_data["mediaAssets"]: - if mediaAsset["type"] == "SOURCE": - return mediaAsset["assetReference"] + for mediaAsset in video_data['mediaAssets']: + if mediaAsset.get('type') == 'SOURCE': + return mediaAsset.get('assetReference') def get_asset_formats(self, video_data, video_id): - asset = self._download_json("https://www.veejoy.de/api/service/get-media-summary?mediaIri=" + self.get_asset_ref(video_data) + "&locale=en", video_id) - return asset["assetFormats"] + return self._download_json( + update_url_query('https://www.veejoy.de/api/service/get-media-summary', { + 'mediaIri': self.get_asset_ref(video_data), + 'locale': 'en' + }), + video_id).get('assetFormats') def get_original_file_url(self, video_data, video_id): for asset_format in self.get_asset_formats(video_data, video_id): - if asset_format["mimeType"] == "video/mp4": + if asset_format.get('mimeType') == 'video/mp4': return asset_format - def get_video_formats(self, asset_formats): + def get_video_formats(self, asset_formats, video_id): # This function is currently faulty and thus not used formats = [] + q = qualities(['hq', 'mq', 'lq']) + for asset_format in asset_formats: - if asset_format["mimeType"] == "application/vnd.apple.mpegurl": - formats.append({ - 'url': asset_format["contentUrl"], - 'width': asset_format["transcodingFormat"]["videoWidth"], - 'quality': asset_format["transcodingFormat"]["id"], - 'language': asset_format["language"], - }) + f_url = url_or_none(asset_format.get('contentUrl')) + if not f_url: + continue + ext = determine_ext(f_url) + transcodingFormat = try_get(asset_format, lambda x: x['transcodingFormat'], dict) or {} + + if transcodingFormat == {}: + continue + + label = strip_or_none(transcodingFormat.get('label') or '').split('-') + extra = ( + ('width', int_or_none(transcodingFormat.get('videoWidth'))), + ('quality', q(label[0])), + ('language', asset_format.get('language')), + ) + if ext == 'm3u8': + # expect 'mimeType': 'application/vnd.apple.mpegurl' + fmts = self._extract_m3u8_formats( + # if the yt-dl HLS downloader doesn't work: `entry_protocol='m3u8'` + f_url, video_id, ext='mp4', entry_protocol='m3u8', + m3u8_id=transcodingFormat.get('formatType'), fatal=False) + for f in fmts: + f.update((k, v) for k, v in extra if f.get(k) is None) + formats.extend(fmts) + else: + # expect 'mimeType': 'video/mp4' + fmt = {'url': f_url} + fmt.update(extra) + formats.append(fmt) return formats def _real_extract(self, url): - video_id = self.get_video_id(url) - video_data = self.get_video_data(url, video_id) + video_id = self._match_id(url) + video_data = self._search_nextjs_data(self._download_webpage(url, video_id), video_id).get('props').get('pageProps').get('media') + title = video_data.get('title') + final_url = self.get_original_file_url(video_data, video_id).get('contentUrl') producer = self.get_producer(video_data) thumbnails = self.get_thumbnails(video_data) - final_asset = self.get_original_file_url(video_data, video_id) return { - 'url': final_asset.get("contentUrl"), + 'url': final_url, 'id': video_id, - 'title': video_data.get("title"), - 'description': video_data.get("shortDescription"), - 'duration': video_data.get("mediaDuration"), + 'title': title, + 'timestamp': parse_iso8601(video_data.get('liveDate')), + 'description': strip_or_none(video_data.get('shortDescription')), + 'duration': int_or_none(video_data.get('mediaDuration')), 'uploader': producer, 'creator': producer, 'thumbnails': thumbnails,