2014-06-26 18:30:44 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2024-03-01 13:12:51 +00:00
|
|
|
import json
|
2015-07-16 17:54:43 +00:00
|
|
|
|
2015-02-12 19:36:54 +00:00
|
|
|
from .common import InfoExtractor
|
2014-06-26 18:30:44 +00:00
|
|
|
from ..utils import (
|
2017-03-01 15:14:46 +00:00
|
|
|
ExtractorError,
|
2014-06-26 18:30:44 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-03-01 12:24:48 +00:00
|
|
|
class NPOIE(InfoExtractor):
|
2015-07-16 17:21:04 +00:00
|
|
|
IE_NAME = 'npo'
|
2024-03-01 12:24:48 +00:00
|
|
|
IE_DESC = 'npo.nl'
|
2015-07-16 17:54:43 +00:00
|
|
|
_VALID_URL = r'''(?x)
|
2015-07-16 18:14:38 +00:00
|
|
|
(?:
|
|
|
|
npo:|
|
|
|
|
https?://
|
|
|
|
(?:www\.)?
|
|
|
|
(?:
|
2024-03-01 12:24:48 +00:00
|
|
|
npo\.nl/(?:[^/]+/)*
|
2015-07-16 18:14:38 +00:00
|
|
|
)
|
2015-07-16 17:54:43 +00:00
|
|
|
)
|
|
|
|
(?P<id>[^/?#]+)
|
|
|
|
'''
|
2014-06-26 18:30:44 +00:00
|
|
|
|
2017-03-01 15:14:46 +00:00
|
|
|
_TESTS = [{
|
2024-03-01 09:36:03 +00:00
|
|
|
'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
|
2024-03-01 13:12:51 +00:00
|
|
|
# TODO fill in other test attributes
|
2017-03-01 15:14:46 +00:00
|
|
|
}, {
|
|
|
|
'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
|
|
|
|
'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
|
|
|
|
'info_dict': {
|
|
|
|
'id': 'VARA_101191800',
|
|
|
|
'ext': 'm4v',
|
|
|
|
'title': 'De Mega Mike & Mega Thomas show: The best of.',
|
|
|
|
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
|
|
|
|
'upload_date': '20090227',
|
|
|
|
'duration': 2400,
|
2014-09-17 12:02:57 +00:00
|
|
|
},
|
2024-03-01 09:36:03 +00:00
|
|
|
'skip': 'Video gone',
|
2017-03-01 15:14:46 +00:00
|
|
|
}, {
|
2024-03-01 09:36:03 +00:00
|
|
|
'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
|
2017-03-01 15:14:46 +00:00
|
|
|
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
|
|
|
'info_dict': {
|
|
|
|
'id': 'VPWON_1169289',
|
|
|
|
'ext': 'm4v',
|
|
|
|
'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika',
|
|
|
|
'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
|
|
|
|
'upload_date': '20130225',
|
|
|
|
'duration': 3000,
|
2014-12-20 12:30:56 +00:00
|
|
|
},
|
2017-03-01 15:14:46 +00:00
|
|
|
}]
|
2014-06-26 18:30:44 +00:00
|
|
|
|
2024-03-01 12:24:48 +00:00
|
|
|
def _get_token(self, video_id):
|
|
|
|
return self._download_json(
|
|
|
|
'https://npo.nl/start/api/domain/player-token?productId=%s' % video_id,
|
|
|
|
video_id,
|
|
|
|
note='Downloading token')['token']
|
2018-06-09 17:26:16 +00:00
|
|
|
|
2014-06-26 18:30:44 +00:00
|
|
|
def _real_extract(self, url):
|
2024-03-01 13:12:51 +00:00
|
|
|
# You might want to use removesuffix here,
|
|
|
|
# but removesuffix is introduced in Python 3.9
|
|
|
|
# and youtube-dl supports Python 3.2+
|
|
|
|
if url.endswith('/afspelen'):
|
|
|
|
url = url[:-9]
|
|
|
|
elif url.endswith('/afspelen/'):
|
|
|
|
url = url[:-10]
|
2024-03-01 14:05:30 +00:00
|
|
|
url = url.rstrip('/')
|
2024-03-01 13:12:51 +00:00
|
|
|
slug = url.split('/')[-1]
|
|
|
|
page = self._download_webpage(url, slug, 'Finding productId using slug: %s' % slug)
|
|
|
|
# TODO find out what proper HTML parsing utilities are available in youtube-dl
|
|
|
|
next_data = page.split('<script id="__NEXT_DATA__" type="application/json">')[1].split('</script>')[0]
|
2024-03-01 14:08:10 +00:00
|
|
|
# TODO The data in this script tag feels like GraphQL, so there might be an easier way
|
|
|
|
# to get the product id, maybe using a GraphQL endpoint
|
2024-03-01 13:12:51 +00:00
|
|
|
next_data = json.loads(next_data)
|
2024-03-01 14:05:30 +00:00
|
|
|
product_id, title, description, thumbnail = None, None, None, None
|
2024-03-01 13:12:51 +00:00
|
|
|
for query in next_data['props']['pageProps']['dehydratedState']['queries']:
|
|
|
|
if isinstance(query['state']['data'], list):
|
|
|
|
for entry in query['state']['data']:
|
2024-03-01 14:05:30 +00:00
|
|
|
if entry['slug'] == slug:
|
|
|
|
product_id = entry.get('productId')
|
|
|
|
title = entry.get('title')
|
|
|
|
synopsis = entry.get('synopsis', {})
|
|
|
|
description = (
|
2024-03-01 14:28:14 +00:00
|
|
|
synopsis.get('long')
|
|
|
|
or synopsis.get('short')
|
|
|
|
or synopsis.get('brief')
|
2024-03-01 14:05:30 +00:00
|
|
|
)
|
|
|
|
thumbnails = entry.get('images')
|
|
|
|
for thumbnail_entry in thumbnails:
|
|
|
|
if 'url' in thumbnail_entry:
|
|
|
|
thumbnail = thumbnail_entry.get('url')
|
2024-03-01 13:12:51 +00:00
|
|
|
if not product_id:
|
|
|
|
raise ExtractorError('No productId found for slug: %s' % slug)
|
|
|
|
|
|
|
|
token = self._get_token(product_id)
|
|
|
|
|
2024-03-01 14:28:14 +00:00
|
|
|
formats = []
|
|
|
|
for profile in (
|
|
|
|
'dash',
|
|
|
|
# 'hls', # TODO test what needs to change for 'hls' support
|
|
|
|
):
|
|
|
|
stream_link = self._download_json(
|
|
|
|
'https://prod.npoplayer.nl/stream-link', video_id=slug,
|
|
|
|
data=json.dumps({
|
|
|
|
'profileName': profile,
|
|
|
|
'drmType': 'widevine',
|
|
|
|
'referrerUrl': url,
|
|
|
|
}).encode('utf8'),
|
|
|
|
headers={
|
|
|
|
'Authorization': token,
|
|
|
|
'Content-Type': 'application/json',
|
|
|
|
}
|
|
|
|
)
|
|
|
|
stream_url = stream_link.get('stream', {}).get('streamURL')
|
|
|
|
formats.extend(self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False))
|
2014-06-26 18:30:44 +00:00
|
|
|
|
|
|
|
return {
|
2024-03-01 13:12:51 +00:00
|
|
|
'id': slug,
|
2024-03-01 14:28:14 +00:00
|
|
|
'formats': formats,
|
2024-03-01 13:12:51 +00:00
|
|
|
'title': title or slug,
|
|
|
|
'description': description,
|
|
|
|
'thumbnail': thumbnail,
|
|
|
|
# TODO fill in other metadata that's available
|
2014-06-26 18:30:44 +00:00
|
|
|
}
|