From 3525025a6fc58891f52a73f77f63fc96f8c54868 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 18 Nov 2024 02:28:15 +0000 Subject: [PATCH] Update extractor for current site behaviour --- youtube_dl/extractor/voe.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/voe.py b/youtube_dl/extractor/voe.py index 870a6b1aa..cbf30f19a 100644 --- a/youtube_dl/extractor/voe.py +++ b/youtube_dl/extractor/voe.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -12,8 +14,8 @@ from ..utils import ( class VOEIE(InfoExtractor): IE_NAME = 'voe' IE_DESC = 'VOE.SX' - _VALID_URL = r'https?://voe\.sx/(e/)?(?P[a-z0-9]+)' - _TEST = { + _VALID_URL = r'https?://voe\.sx(?P/(e/)?(?P[a-z0-9]+))' + _TESTS = [{ 'url': 'https://voe.sx/e/ng7ja5n5n2y8', 'info_dict': { 'id': 'ng7ja5n5n2y8', @@ -21,29 +23,39 @@ class VOEIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'ext': 'm3u8', }, - } + }] def _real_extract(self, url): - video_id = self._match_id(url) + video_id, video_path = self._match_valid_url(url).group('id', 'path') webpage = self._download_webpage( 'https://voe.sx/e/%s' % video_id, video_id) - sources = self._parse_json( - self._search_regex(r'\bsources\s*=\s*(\{[^}]+\})', webpage, 'sources'), - video_id, transform_source=js_to_json) + player_url = self._search_regex( + r'''("|')(?Phttps://(?!voe\.sx/)[^/]+%s)\1\s*;''' % (path,), + webpage, 'redirect', group='url', default=None) + if player_url: + webpage = self._download_webpage( + player_url, video_id, note='Redirecting to player page') + + + sources = self._search_json( + r'\bsources\s*=', webpage, 'sources', video_id, transform_source=js_to_json) title = self._search_regex( r'(?:Watch\s+)?(?P<title>.+?)(?:-\s+VOE\s+\|.+)?', webpage, 'title', group='title') formats = [] - - f_url = url_or_none(sources.get('hls')) + for fmt in ('mp4, 'hls'): + if fmt not in sources: + continue + sources[fmt] = url_or_none(sources['fmt']) or url_or_none(base64.b64decode(sources['fmt'])) + f_url = sources.get('hls') if f_url: formats.extend(self._extract_m3u8_formats( f_url, video_id, entry_protocol='m3u8_native', fatal=False)) - f_url = url_or_none(sources.get('mp4')) + f_url = sources.get('mp4') if f_url: formats.append({ 'url': f_url, @@ -54,7 +66,7 @@ class VOEIE(InfoExtractor): self._sort_formats(formats) thumbnail = url_or_none(self._search_regex( - r'(?:VOEPlayer.|data-)poster\s*=\s*(["\'])(?P(?:(?!\1)\S)+)\1', + r'''(?:VOEPlayer\s*\.\s*|data-)poster\s*=\s*("|')(?P(?:(?!\1)\S)+)\1''', webpage, 'thumbnail', group='thumbnail', default=None)) return {