from __future__ import unicode_literals import json import re from .common import InfoExtractor from ..utils import ExtractorError class NPOIE(InfoExtractor): IE_NAME = 'npo' IE_DESC = 'npo.nl' _VALID_URL = r'''(?x) (?: https?:// (?:www\.)? (?: npo\.nl/(?:[^/]+/)* ) ) (?P[^/?#]+) ''' _TESTS = [{ 'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/', # TODO fill in other test attributes }, { 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', 'info_dict': { 'id': 'VARA_101191800', 'ext': 'm4v', 'title': 'De Mega Mike & Mega Thomas show: The best of.', 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', 'upload_date': '20090227', 'duration': 2400, }, 'skip': 'Video gone', }, { 'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika', 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', 'info_dict': { 'id': 'VPWON_1169289', 'ext': 'm4v', 'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika', 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', 'duration': 3000, }, }] def _get_token(self, video_id): return self._download_json( 'https://npo.nl/start/api/domain/player-token?productId=%s' % video_id, video_id, note='Downloading token')['token'] def _real_extract(self, url): # You might want to use removesuffix here, # but removesuffix is introduced in Python 3.9 # and youtube-dl supports Python 3.2+ if url.endswith('/afspelen'): url = url[:-9] elif url.endswith('/afspelen/'): url = url[:-10] url = url.rstrip('/') slug = url.split('/')[-1] page = self._download_webpage(url, slug, 'Finding productId using slug: %s' % slug) # TODO find out what proper HTML parsing utilities are available in youtube-dl next_data = page.split('')[0] # TODO The data in this script tag feels like GraphQL, so there might be an easier way # to get the product id, maybe using a GraphQL endpoint next_data = self._parse_json(next_data, slug) product_id, title, description, thumbnail = None, None, None, None for query in next_data['props']['pageProps']['dehydratedState']['queries']: if isinstance(query['state']['data'], list): for entry in query['state']['data']: if entry['slug'] == slug: product_id = entry.get('productId') title = entry.get('title') synopsis = entry.get('synopsis', {}) description = (synopsis.get('long') or synopsis.get('short') or synopsis.get('brief')) thumbnails = entry.get('images') for thumbnail_entry in thumbnails: if 'url' in thumbnail_entry: thumbnail = thumbnail_entry.get('url') if not product_id: raise ExtractorError('No productId found for slug: %s' % slug) formats = self._download_by_product_id(product_id, slug, url) return { 'id': slug, 'formats': formats, 'title': title or slug, 'description': description, 'thumbnail': thumbnail, # TODO fill in other metadata that's available } def _download_by_product_id(self, product_id, slug, url=None): token = self._get_token(product_id) formats = [] for profile in ( 'dash', # 'hls', # TODO test what needs to change for 'hls' support ): stream_link = self._download_json( 'https://prod.npoplayer.nl/stream-link', video_id=slug, data=json.dumps({ 'profileName': profile, 'drmType': 'widevine', 'referrerUrl': url or '', }).encode('utf8'), headers={ 'Authorization': token, 'Content-Type': 'application/json', } ) stream_url = stream_link.get('stream', {}).get('streamURL') formats.extend(self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False)) return formats class BNNVaraIE(NPOIE): IE_NAME = 'bnnvara' IE_DESC = 'bnnvara.nl' _VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*' _TESTS = [{ 'url': 'https://www.bnnvara.nl/videos/27455', }] def _real_extract(self, url): url = url.rstrip('/') video_id = url.split('/')[-1] media = self._download_json('https://api.bnnvara.nl/bff/graphql', video_id, data=json.dumps( { 'operationName': 'getMedia', 'variables': { 'id': video_id, 'hasAdConsent': False, 'atInternetId': 70 }, 'query': 'query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {\n player(\n id: $id\n mediaUrl: $mediaUrl\n hasAdConsent: $hasAdConsent\n atInternetId: $atInternetId\n ) {\n ... on PlayerSucces {\n brand {\n name\n slug\n broadcastsEnabled\n __typename\n }\n title\n programTitle\n pomsProductId\n broadcasters {\n name\n __typename\n }\n duration\n classifications {\n title\n imageUrl\n type\n __typename\n }\n image {\n title\n url\n __typename\n }\n cta {\n title\n url\n __typename\n }\n genres {\n name\n __typename\n }\n subtitles {\n url\n language\n __typename\n }\n sources {\n name\n url\n ratio\n __typename\n }\n type\n token\n __typename\n }\n ... on PlayerError {\n error\n __typename\n }\n __typename\n }\n}' }).encode('utf8'), headers={ 'Content-Type': 'application/json', }) product_id = media.get('data', {}).get('player', {}).get('pomsProductId') formats = self._download_by_product_id(product_id, video_id) return { 'id': product_id, 'title': media.get('data', {}).get('player', {}).get('title'), 'formats': formats, 'thumbnail': media.get('data', {}).get('player', {}).get('image').get('url'), } class ONIE(NPOIE): IE_NAME = 'on' IE_DESC = 'ongehoordnederland.tv' _VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*' _TESTS = [{ 'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/', }] def _real_extract(self, url): video_id = url.rstrip('/').split('/')[-1] page, _ = self._download_webpage_handle(url, video_id) results = re.findall("page: '(.+)'", page) formats = [] for result in results: formats.extend(self._download_by_product_id(result, video_id)) if not formats: raise ExtractorError('Could not find a POMS product id in the provided URL.') return { 'id': video_id, 'title': video_id, 'formats': formats, } class VPROIE(NPOIE): IE_NAME = 'vpro' IE_DESC = 'vpro.nl' _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*' _TESTS = [{ 'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html', }] def _real_extract(self, url): video_id = url.rstrip('/').split('/')[-1] page, _ = self._download_webpage_handle(url, video_id) results = re.findall('data-media-id="(.+_.+)"\s', page) formats = [] for result in results: formats.extend(self._download_by_product_id(result, video_id)) break # TODO find a better solution, VPRO pages can have multiple videos embedded if not formats: raise ExtractorError('Could not find a POMS product id in the provided URL.') return { 'id': video_id, 'title': video_id, 'formats': formats, } class ZAPPIE(NPOIE): IE_NAME = 'zapp' IE_DESC = 'zapp.nl' _VALID_URL = r'https?://(?:www\.)?zapp.nl/.*' _TESTS = [{ 'url': 'https://www.zapp.nl/programmas/zappsport/gemist/AT_300003973', }] def _real_extract(self, url): video_id = url.rstrip('/').split('/')[-1] formats = self._download_by_product_id(url, video_id) return { 'id': video_id, 'title': video_id, 'formats': formats, }