From eb6e396bfb66965487ef1e7c50edbf6e28130462 Mon Sep 17 00:00:00 2001
From: Bart Broere <mail@bartbroere.eu>
Date: Tue, 5 Mar 2024 13:55:59 +0100
Subject: [PATCH] First version of a VPRO regex

---
 youtube_dl/extractor/npo.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index d8573d343..d48a4cda0 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -4,9 +4,7 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
+from ..utils import ExtractorError
 
 
 class NPOIE(InfoExtractor):
@@ -189,3 +187,29 @@ class ONIE(NPOIE):
             'title': video_id,
             'formats': formats,
         }
+
+
+class VPROIE(NPOIE):
+    IE_NAME = 'vpro'
+    IE_DESC = 'vpro.nl'
+    _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
+    _TESTS = [{
+        'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
+    }]
+
+    def _real_extract(self, url):
+        video_id = url.rstrip('/').split('/')[-1]
+        page, _ = self._download_webpage_handle(url, video_id)
+        results = re.findall('data-media-id="(.+_.+)"\s', page)
+        formats = []
+        for result in results:
+            formats.extend(self._download_by_product_id(result, video_id))
+
+        if not formats:
+            raise ExtractorError('Could not find a POMS product id in the provided URL.')
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }