From eb6e396bfb66965487ef1e7c50edbf6e28130462 Mon Sep 17 00:00:00 2001 From: Bart Broere Date: Tue, 5 Mar 2024 13:55:59 +0100 Subject: [PATCH] First version of a VPRO regex --- youtube_dl/extractor/npo.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index d8573d343..d48a4cda0 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -4,9 +4,7 @@ import json import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) +from ..utils import ExtractorError class NPOIE(InfoExtractor): @@ -189,3 +187,29 @@ class ONIE(NPOIE): 'title': video_id, 'formats': formats, } + + +class VPROIE(NPOIE): + IE_NAME = 'vpro' + IE_DESC = 'vpro.nl' + _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*' + _TESTS = [{ + 'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html', + }] + + def _real_extract(self, url): + video_id = url.rstrip('/').split('/')[-1] + page, _ = self._download_webpage_handle(url, video_id) + results = re.findall('data-media-id="(.+_.+)"\s', page) + formats = [] + for result in results: + formats.extend(self._download_by_product_id(result, video_id)) + + if not formats: + raise ExtractorError('Could not find a POMS product id in the provided URL.') + + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + }