1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-18 16:26:23 +00:00

[veejoy] applied suggested changes from pr

This commit is contained in:
Thilo Billerbeck 2022-08-17 21:06:59 +02:00
parent c191cda21d
commit 3b3f2dd6e1
2 changed files with 94 additions and 49 deletions

View file

@ -1416,6 +1416,7 @@ from .ustudio import (
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veehd import VeeHDIE from .veehd import VeeHDIE
from .veejoy import VeejoyIE
from .veoh import VeohIE from .veoh import VeohIE
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
@ -1661,4 +1662,3 @@ from .zingmp3 import (
) )
from .zoom import ZoomIE from .zoom import ZoomIE
from .zype import ZypeIE from .zype import ZypeIE
from .veejoy import VeejoyIE

View file

@ -1,8 +1,19 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
parse_iso8601,
qualities,
strip_or_none,
try_get,
update_url_query,
url_or_none,
)
class VeejoyIE(InfoExtractor): class VeejoyIE(InfoExtractor):
@ -14,7 +25,9 @@ class VeejoyIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'On-ride Tyrol Log Flume', 'title': 'On-ride Tyrol Log Flume',
'description': 'Through the magical world of diamonds and straight into the cool water. Experience a different kind of water slide with the Tyrol Log Flume. One of the oldest and most popular attractions in the park!', 'description': 'Through the magical world of diamonds and straight into the cool water. Experience a different kind of water slide with the Tyrol Log Flume. One of the oldest and most popular attractions in the park!',
'uploader': 'MACK Media' 'uploader': 'MACK Media',
'upload_date': '20210923',
'timestamp': 1632388920
} }
}, { }, {
'url': 'https://www.veejoy.de/en/movies/off-to-rulantica', 'url': 'https://www.veejoy.de/en/movies/off-to-rulantica',
@ -23,7 +36,9 @@ class VeejoyIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Off to Rulantica', 'title': 'Off to Rulantica',
'description': 'Rocking through the water on round boats, creating splashy fun with water cannons and then, sliding down Svalgurok on ten different slides: Soaking wet water fun is calling.', 'description': 'Rocking through the water on round boats, creating splashy fun with water cannons and then, sliding down Svalgurok on ten different slides: Soaking wet water fun is calling.',
'uploader': 'Veejoy' 'uploader': 'Veejoy',
'upload_date': '20220811',
'timestamp': 1660206600
} }
}, { }, {
'url': 'https://www.veejoy.de/de/series/o-the-construction-documentary/the-building-site-grows', 'url': 'https://www.veejoy.de/de/series/o-the-construction-documentary/the-building-site-grows',
@ -32,83 +47,113 @@ class VeejoyIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bau-„Leiter“', 'title': 'Bau-„Leiter“',
'description': 'Auf der Baustelle ist viel passiert. Patrick und Lukas bekommen ein Update vom Bauleiter, erklären technische Grundlagen am „lebenden Objekt“ und stellen sich einer Onride-Challenge.', 'description': 'Auf der Baustelle ist viel passiert. Patrick und Lukas bekommen ein Update vom Bauleiter, erklären technische Grundlagen am „lebenden Objekt“ und stellen sich einer Onride-Challenge.',
'uploader': 'MACK Media' 'uploader': 'MACK Media',
'timestamp': 1658997000,
'upload_date': '20220728'
} }
}] }]
def get_video_id(self, url): def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
return self._match_id(url) return self._parse_json(
self._search_regex(
def get_video_data(self, url, video_id): r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
webpage = self._download_webpage(url, video_id) webpage, 'next.js data', fatal=fatal, **kw),
next_data = self._html_search_regex(r'<script id="__NEXT_DATA__" type="application/json">([^<]+)</script>', webpage, 'next_data') video_id, transform_source=transform_source, fatal=fatal)
return json.loads(next_data)["props"]["pageProps"]["media"]
def get_producer(self, video_data): def get_producer(self, video_data):
if "item" in video_data["studioDetails"]: return (
return video_data["studioDetails"]["item"]["title"] strip_or_none(
else: try_get(video_data, lambda x: x['studioDetails']['item']['title'], compat_str))
return "Veejoy" or 'Veejoy')
def get_thumbnails(self, video_data): def get_thumbnails(self, video_data):
thumbnails = [] thumbnails = []
thumb_3_4 = video_data["teaserImage"]["3_4"] for res in ('3_4', '16_9'):
if thumb_3_4: thumb = try_get(video_data, lambda x: x['teaserImage'][res], dict)
if not thumb:
continue
thumb = url_or_none(try_get(thumb, lambda x: x['srcSet'][1].split(' ')[0]))
if thumb:
thumbnails.append({ thumbnails.append({
'url': thumb_3_4["srcSet"][1].split(" ")[0], 'id': res,
}) 'url': thumb,
thumb_16_9 = video_data["teaserImage"]["16_9"]
if thumb_16_9:
thumbnails.append({
'url': thumb_16_9["srcSet"][1].split(" ")[0],
}) })
return thumbnails return thumbnails
def get_asset_ref(self, video_data): def get_asset_ref(self, video_data):
for mediaAsset in video_data["mediaAssets"]: for mediaAsset in video_data['mediaAssets']:
if mediaAsset["type"] == "SOURCE": if mediaAsset.get('type') == 'SOURCE':
return mediaAsset["assetReference"] return mediaAsset.get('assetReference')
def get_asset_formats(self, video_data, video_id): def get_asset_formats(self, video_data, video_id):
asset = self._download_json("https://www.veejoy.de/api/service/get-media-summary?mediaIri=" + self.get_asset_ref(video_data) + "&locale=en", video_id) return self._download_json(
return asset["assetFormats"] update_url_query('https://www.veejoy.de/api/service/get-media-summary', {
'mediaIri': self.get_asset_ref(video_data),
'locale': 'en'
}),
video_id).get('assetFormats')
def get_original_file_url(self, video_data, video_id): def get_original_file_url(self, video_data, video_id):
for asset_format in self.get_asset_formats(video_data, video_id): for asset_format in self.get_asset_formats(video_data, video_id):
if asset_format["mimeType"] == "video/mp4": if asset_format.get('mimeType') == 'video/mp4':
return asset_format return asset_format
def get_video_formats(self, asset_formats): def get_video_formats(self, asset_formats, video_id):
# This function is currently faulty and thus not used # This function is currently faulty and thus not used
formats = [] formats = []
q = qualities(['hq', 'mq', 'lq'])
for asset_format in asset_formats: for asset_format in asset_formats:
if asset_format["mimeType"] == "application/vnd.apple.mpegurl": f_url = url_or_none(asset_format.get('contentUrl'))
formats.append({ if not f_url:
'url': asset_format["contentUrl"], continue
'width': asset_format["transcodingFormat"]["videoWidth"], ext = determine_ext(f_url)
'quality': asset_format["transcodingFormat"]["id"], transcodingFormat = try_get(asset_format, lambda x: x['transcodingFormat'], dict) or {}
'language': asset_format["language"],
}) if transcodingFormat == {}:
continue
label = strip_or_none(transcodingFormat.get('label') or '').split('-')
extra = (
('width', int_or_none(transcodingFormat.get('videoWidth'))),
('quality', q(label[0])),
('language', asset_format.get('language')),
)
if ext == 'm3u8':
# expect 'mimeType': 'application/vnd.apple.mpegurl'
fmts = self._extract_m3u8_formats(
# if the yt-dl HLS downloader doesn't work: `entry_protocol='m3u8'`
f_url, video_id, ext='mp4', entry_protocol='m3u8',
m3u8_id=transcodingFormat.get('formatType'), fatal=False)
for f in fmts:
f.update((k, v) for k, v in extra if f.get(k) is None)
formats.extend(fmts)
else:
# expect 'mimeType': 'video/mp4'
fmt = {'url': f_url}
fmt.update(extra)
formats.append(fmt)
return formats return formats
def _real_extract(self, url): def _real_extract(self, url):
video_id = self.get_video_id(url) video_id = self._match_id(url)
video_data = self.get_video_data(url, video_id) video_data = self._search_nextjs_data(self._download_webpage(url, video_id), video_id).get('props').get('pageProps').get('media')
title = video_data.get('title')
final_url = self.get_original_file_url(video_data, video_id).get('contentUrl')
producer = self.get_producer(video_data) producer = self.get_producer(video_data)
thumbnails = self.get_thumbnails(video_data) thumbnails = self.get_thumbnails(video_data)
final_asset = self.get_original_file_url(video_data, video_id)
return { return {
'url': final_asset.get("contentUrl"), 'url': final_url,
'id': video_id, 'id': video_id,
'title': video_data.get("title"), 'title': title,
'description': video_data.get("shortDescription"), 'timestamp': parse_iso8601(video_data.get('liveDate')),
'duration': video_data.get("mediaDuration"), 'description': strip_or_none(video_data.get('shortDescription')),
'duration': int_or_none(video_data.get('mediaDuration')),
'uploader': producer, 'uploader': producer,
'creator': producer, 'creator': producer,
'thumbnails': thumbnails, 'thumbnails': thumbnails,