From 907a09cbf71baa70a04f0dee80c16968f1cf2898 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Mon, 29 Aug 2022 11:56:06 +0200 Subject: [PATCH 1/6] [w24at] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/w24at.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/w24at.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 751fc38b6..32255d438 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1558,6 +1558,7 @@ from .wsj import ( WSJArticleIE, ) from .wwe import WWEIE +from .w24at import W24atIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE diff --git a/youtube_dl/extractor/w24at.py b/youtube_dl/extractor/w24at.py new file mode 100644 index 000000000..de4f46316 --- /dev/null +++ b/youtube_dl/extractor/w24at.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals, print_function + +from .common import InfoExtractor + + +class W24atIE(InfoExtractor): + _VALID_URL = r'https://(?:www\.)?w24\.at/Video/.*/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.w24.at/Video/Bewegung-macht-Spass-Folge-62-Kids-6/24828', + 'md5': '2cfa88aa93f7747a20567ca1ca4a9ab7', + 'info_dict': { + 'id': '24828', + 'ext': 'mp4', + 'title': 'Bewegung macht Spaß! - Folge 62: Kids 6 - W24', + 'description': 'Stefans Ziel ist es Kindern auch hinter den Bildschirmen zur Bewegung und zum Denksport zu animieren und das ganze mit Spaß und Köpfchen zu verbinden.' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + media_server = self._html_search_regex(r'var\s*mediaServer\s*=\s*\{.*"vod"\s*:\s*"([^"]+)"', + webpage, "vod", "ms02.w24.at") + m3u8_path = self._html_search_regex(r"src:.*\+ '([^']+)'.*type:'application/x-mpegURL'", + webpage, "video") + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'formats': self._extract_m3u8_formats("https://%s%s" % (media_server, m3u8_path), video_id, 'mp4'), + 'extension': 'mp4' + } From 25fcfdf5662019f27ce34b94f9d1bfa0c026e8f7 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Mon, 29 Aug 2022 11:58:14 +0200 Subject: [PATCH 2/6] Remove unnecessary import --- youtube_dl/extractor/w24at.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/w24at.py b/youtube_dl/extractor/w24at.py index de4f46316..9f743c6c3 100644 --- a/youtube_dl/extractor/w24at.py +++ b/youtube_dl/extractor/w24at.py @@ -1,5 +1,5 @@ # coding: utf-8 -from __future__ import unicode_literals, print_function +from __future__ import unicode_literals from .common import InfoExtractor From 48f020cb9acf2909340e05d3df6635e8b0b0e195 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Tue, 30 Aug 2022 12:58:39 +0200 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: dirkf --- youtube_dl/extractor/w24at.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/w24at.py b/youtube_dl/extractor/w24at.py index 9f743c6c3..0b42ae446 100644 --- a/youtube_dl/extractor/w24at.py +++ b/youtube_dl/extractor/w24at.py @@ -26,8 +26,10 @@ class W24atIE(InfoExtractor): webpage, "video") return { 'id': video_id, - 'title': self._og_search_title(webpage), + 'title': re.sub(r'\s+-\sW24\s*$', '', self._og_search_title(webpage)), 'description': self._og_search_description(webpage), 'formats': self._extract_m3u8_formats("https://%s%s" % (media_server, m3u8_path), video_id, 'mp4'), - 'extension': 'mp4' + 'extension': 'mp4', + 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader': self._og_search_property('site_name', webpage, fatal=False), } From 325427ce17afbc901fdc0e0e96b2e46ee9a98fcc Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Tue, 30 Aug 2022 13:34:55 +0200 Subject: [PATCH 4/6] In between --- youtube_dl/extractor/w24at.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/w24at.py b/youtube_dl/extractor/w24at.py index 0b42ae446..7f8311065 100644 --- a/youtube_dl/extractor/w24at.py +++ b/youtube_dl/extractor/w24at.py @@ -1,5 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor @@ -8,12 +9,14 @@ class W24atIE(InfoExtractor): _VALID_URL = r'https://(?:www\.)?w24\.at/Video/.*/(?P[0-9]+)' _TEST = { 'url': 'https://www.w24.at/Video/Bewegung-macht-Spass-Folge-62-Kids-6/24828', - 'md5': '2cfa88aa93f7747a20567ca1ca4a9ab7', + 'md5': 'cc1bd6c334ed284a44528c6dbc9bc6ff', 'info_dict': { 'id': '24828', 'ext': 'mp4', - 'title': 'Bewegung macht Spaß! - Folge 62: Kids 6 - W24', - 'description': 'Stefans Ziel ist es Kindern auch hinter den Bildschirmen zur Bewegung und zum Denksport zu animieren und das ganze mit Spaß und Köpfchen zu verbinden.' + 'title': 'Bewegung macht Spaß! - Folge 62: Kids 6', + 'description': 'Stefans Ziel ist es Kindern auch hinter den Bildschirmen zur Bewegung und zum Denksport zu animieren und das ganze mit Spaß und Köpfchen zu verbinden.', + 'thumbnail': r're:.*\.jpg$', + 'uploader': 'W24' } } @@ -22,13 +25,20 @@ class W24atIE(InfoExtractor): webpage = self._download_webpage(url, video_id) media_server = self._html_search_regex(r'var\s*mediaServer\s*=\s*\{.*"vod"\s*:\s*"([^"]+)"', webpage, "vod", "ms02.w24.at") + mp4_path = self._html_search_regex(r"src:.*\+ '([^']+)'.*type:'video/mp4'", + webpage, "mp4_video") m3u8_path = self._html_search_regex(r"src:.*\+ '([^']+)'.*type:'application/x-mpegURL'", - webpage, "video") + webpage, "m3u8") + formats = [] + if mp4_path: + formats.append({'url': "https://%s%s" % (media_server, mp4_path)}) + formats.extend(self._extract_m3u8_formats("https://%s%s" % (media_server, m3u8_path), video_id, 'mp4', entry_protocol='m3u8_native')) + self._sort_formats(formats) return { 'id': video_id, 'title': re.sub(r'\s+-\sW24\s*$', '', self._og_search_title(webpage)), 'description': self._og_search_description(webpage), - 'formats': self._extract_m3u8_formats("https://%s%s" % (media_server, m3u8_path), video_id, 'mp4'), + 'formats': formats, 'extension': 'mp4', 'thumbnail': self._og_search_thumbnail(webpage), 'uploader': self._og_search_property('site_name', webpage, fatal=False), From 7b214829075a7cfe6de62d523632cdc2a7a7a887 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Tue, 30 Aug 2022 16:46:22 +0200 Subject: [PATCH 5/6] Should work now --- youtube_dl/extractor/w24at.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/w24at.py b/youtube_dl/extractor/w24at.py index 7f8311065..eabe81349 100644 --- a/youtube_dl/extractor/w24at.py +++ b/youtube_dl/extractor/w24at.py @@ -9,7 +9,7 @@ class W24atIE(InfoExtractor): _VALID_URL = r'https://(?:www\.)?w24\.at/Video/.*/(?P[0-9]+)' _TEST = { 'url': 'https://www.w24.at/Video/Bewegung-macht-Spass-Folge-62-Kids-6/24828', - 'md5': 'cc1bd6c334ed284a44528c6dbc9bc6ff', + 'md5': '16e6f1c5d4a0d54d420e3d9d122660a1', 'info_dict': { 'id': '24828', 'ext': 'mp4', @@ -26,20 +26,19 @@ class W24atIE(InfoExtractor): media_server = self._html_search_regex(r'var\s*mediaServer\s*=\s*\{.*"vod"\s*:\s*"([^"]+)"', webpage, "vod", "ms02.w24.at") mp4_path = self._html_search_regex(r"src:.*\+ '([^']+)'.*type:'video/mp4'", - webpage, "mp4_video") + webpage, "mp4_path") m3u8_path = self._html_search_regex(r"src:.*\+ '([^']+)'.*type:'application/x-mpegURL'", - webpage, "m3u8") + webpage, "m3u8_path") formats = [] if mp4_path: - formats.append({'url': "https://%s%s" % (media_server, mp4_path)}) - formats.extend(self._extract_m3u8_formats("https://%s%s" % (media_server, m3u8_path), video_id, 'mp4', entry_protocol='m3u8_native')) + formats.append({'url': "https://%s%s" % (media_server, mp4_path), 'ext': 'mp4'}) + formats.extend(self._extract_m3u8_formats("https://%s%s" % (media_server, m3u8_path), video_id, ext='mp4')) self._sort_formats(formats) return { 'id': video_id, 'title': re.sub(r'\s+-\sW24\s*$', '', self._og_search_title(webpage)), 'description': self._og_search_description(webpage), 'formats': formats, - 'extension': 'mp4', 'thumbnail': self._og_search_thumbnail(webpage), 'uploader': self._og_search_property('site_name', webpage, fatal=False), } From 8c8b07c4e9311d08e5de2b305f41bafeeb06e4ac Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 3 Feb 2023 04:11:27 +0000 Subject: [PATCH 6/6] Small update, force CI --- youtube_dl/extractor/w24at.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/w24at.py b/youtube_dl/extractor/w24at.py index eabe81349..b273c4a57 100644 --- a/youtube_dl/extractor/w24at.py +++ b/youtube_dl/extractor/w24at.py @@ -1,5 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals + import re from .common import InfoExtractor