From 9aeb231113cd4e3ab1645f259acddc0cbc4e62f8 Mon Sep 17 00:00:00 2001 From: fourjr <28086837+fourjr@users.noreply.github.com> Date: Sat, 21 Nov 2020 23:01:32 +0800 Subject: [PATCH 1/5] [ChannelNewsAsia] Add new extractor --- youtube_dl/extractor/channelnewsasia.py | 44 +++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/channelnewsasia.py diff --git a/youtube_dl/extractor/channelnewsasia.py b/youtube_dl/extractor/channelnewsasia.py new file mode 100644 index 000000000..654e537d6 --- /dev/null +++ b/youtube_dl/extractor/channelnewsasia.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re + +from .common import InfoExtractor + + +class ChannelNewsAsiaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?channelnewsasia\.com/(?:(?:-|\w|\d)+)/(?:(?:-|\w|\d)+)/(?P(?:-|\w|\d)+)' + _TESTS = [ + { + 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech', + 'md5': 'a8ba47ac856fccb6213c74f1d82eeb3d', + 'info_dict': { + 'id': '9ldHdzajE6gEsQce6-K8eVvYNSAgY3fg', + 'ext': 'mp4', + 'title': 'Home', + 'description': 'md5:03740111008a32416327f07dbbc5526c', + }, + }, + { + 'url': 'https://www.channelnewsasia.com/news/asia/removing-the-negative-influences-of-religion-in-tibet-video-13604084', + 'md5': 'ed846cfca037823fa6d3d0d7af8a4e8f', + 'info_dict': { + 'id': 'ljZjd0ajE6NNMhVJ3Gb-QfL1l0p-qW6-', + 'ext': 'mp4', + 'title': "Removing the 'negative influences of religion' in Tibet | Video", + 'description': 'md5:777989926133319de6f6501372175fbf', + }, + } + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + url_obj = ( + re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL) + ) + + ooyala_id = url_obj.group('id') + return self.url_result( + 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id + ) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5691c4cba..0e98f4743 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -184,6 +184,7 @@ from .ceskatelevize import ( CeskaTelevizePoradyIE, ) from .channel9 import Channel9IE +from .channelnewsasia import ChannelNewsAsiaIE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE From 6ee229b4f1858621802dd8efe7d2900aac587b2d Mon Sep 17 00:00:00 2001 From: fourjr <28086837+fourjr@users.noreply.github.com> Date: Sun, 22 Nov 2020 00:02:27 +0800 Subject: [PATCH 2/5] [ChannelNewsAsia] add fallback --- youtube_dl/extractor/channelnewsasia.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/channelnewsasia.py b/youtube_dl/extractor/channelnewsasia.py index 654e537d6..457831a42 100644 --- a/youtube_dl/extractor/channelnewsasia.py +++ b/youtube_dl/extractor/channelnewsasia.py @@ -10,12 +10,12 @@ class ChannelNewsAsiaIE(InfoExtractor): _TESTS = [ { 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech', - 'md5': 'a8ba47ac856fccb6213c74f1d82eeb3d', + 'md5': 'ed9ed143052f0da3ee8a8fa59ba16870', 'info_dict': { - 'id': '9ldHdzajE6gEsQce6-K8eVvYNSAgY3fg', + 'id': 'w0ZWRzajE6qDPXDb7DSeaOCJ3bJ3GDqC', 'ext': 'mp4', - 'title': 'Home', - 'description': 'md5:03740111008a32416327f07dbbc5526c', + 'title': 'Wizards Of Tech_2020_0_1_Body', + 'description': 'md5:b3882dd00e329e623a179465de9f5478', }, }, { @@ -36,6 +36,7 @@ class ChannelNewsAsiaIE(InfoExtractor): webpage = self._download_webpage(url, display_id) url_obj = ( re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL) + or re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL) ) ooyala_id = url_obj.group('id') From 357a506c2050ebc3089869fbf1b4fca5693e1453 Mon Sep 17 00:00:00 2001 From: fourjr <28086837+fourjr@users.noreply.github.com> Date: Sun, 22 Nov 2020 00:03:40 +0800 Subject: [PATCH 3/5] [ChannelNewsAsia] follow conventions --- youtube_dl/extractor/channelnewsasia.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/channelnewsasia.py b/youtube_dl/extractor/channelnewsasia.py index 457831a42..4739bbb58 100644 --- a/youtube_dl/extractor/channelnewsasia.py +++ b/youtube_dl/extractor/channelnewsasia.py @@ -36,8 +36,7 @@ class ChannelNewsAsiaIE(InfoExtractor): webpage = self._download_webpage(url, display_id) url_obj = ( re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL) - or re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL) - ) + or re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL)) ooyala_id = url_obj.group('id') return self.url_result( From 59e9536b9838c4616330ceea54036a2846eb19a3 Mon Sep 17 00:00:00 2001 From: fourjr <28086837+fourjr@users.noreply.github.com> Date: Sun, 22 Nov 2020 18:03:12 +0800 Subject: [PATCH 4/5] [channelnewsasia] fix tests, more relaxed regex --- youtube_dl/extractor/channelnewsasia.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/channelnewsasia.py b/youtube_dl/extractor/channelnewsasia.py index 4739bbb58..7a59192d7 100644 --- a/youtube_dl/extractor/channelnewsasia.py +++ b/youtube_dl/extractor/channelnewsasia.py @@ -9,7 +9,7 @@ class ChannelNewsAsiaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?channelnewsasia\.com/(?:(?:-|\w|\d)+)/(?:(?:-|\w|\d)+)/(?P(?:-|\w|\d)+)' _TESTS = [ { - 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech', + 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech/wizards-of-tech-body-13515106', 'md5': 'ed9ed143052f0da3ee8a8fa59ba16870', 'info_dict': { 'id': 'w0ZWRzajE6qDPXDb7DSeaOCJ3bJ3GDqC', @@ -34,11 +34,15 @@ class ChannelNewsAsiaIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - url_obj = ( - re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL) - or re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL)) + + ooyala_id = ( + self._search_regex( + r'id="ooyala-\d+-((?:\d|\w|-)+)--\d+', webpage, 'ooyala id', + default=None, fatal=False) + or self._search_regex( + r'video-asset-id="((?:\d|\w|-)+)', webpage, 'ooyala id', + default=None, fatal=False)) - ooyala_id = url_obj.group('id') return self.url_result( 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id ) From c950d57de684b3ab6613960565ccf4189221b9f1 Mon Sep 17 00:00:00 2001 From: fourjr <28086837+fourjr@users.noreply.github.com> Date: Sun, 22 Nov 2020 18:04:45 +0800 Subject: [PATCH 5/5] [channelnewsasia] follow conventions --- youtube_dl/extractor/channelnewsasia.py | 45 +++++++++++-------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/channelnewsasia.py b/youtube_dl/extractor/channelnewsasia.py index 7a59192d7..511fee2d5 100644 --- a/youtube_dl/extractor/channelnewsasia.py +++ b/youtube_dl/extractor/channelnewsasia.py @@ -1,40 +1,36 @@ # coding: utf-8 from __future__ import unicode_literals -import re from .common import InfoExtractor class ChannelNewsAsiaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?channelnewsasia\.com/(?:(?:-|\w|\d)+)/(?:(?:-|\w|\d)+)/(?P(?:-|\w|\d)+)' - _TESTS = [ - { - 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech/wizards-of-tech-body-13515106', - 'md5': 'ed9ed143052f0da3ee8a8fa59ba16870', - 'info_dict': { - 'id': 'w0ZWRzajE6qDPXDb7DSeaOCJ3bJ3GDqC', - 'ext': 'mp4', - 'title': 'Wizards Of Tech_2020_0_1_Body', - 'description': 'md5:b3882dd00e329e623a179465de9f5478', - }, + _TESTS = [{ + 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech/wizards-of-tech-body-13515106', + 'md5': 'ed9ed143052f0da3ee8a8fa59ba16870', + 'info_dict': { + 'id': 'w0ZWRzajE6qDPXDb7DSeaOCJ3bJ3GDqC', + 'ext': 'mp4', + 'title': 'Wizards Of Tech_2020_0_1_Body', + 'description': 'md5:b3882dd00e329e623a179465de9f5478', }, - { - 'url': 'https://www.channelnewsasia.com/news/asia/removing-the-negative-influences-of-religion-in-tibet-video-13604084', - 'md5': 'ed846cfca037823fa6d3d0d7af8a4e8f', - 'info_dict': { - 'id': 'ljZjd0ajE6NNMhVJ3Gb-QfL1l0p-qW6-', - 'ext': 'mp4', - 'title': "Removing the 'negative influences of religion' in Tibet | Video", - 'description': 'md5:777989926133319de6f6501372175fbf', - }, - } - ] + }, { + 'url': 'https://www.channelnewsasia.com/news/asia/removing-the-negative-influences-of-religion-in-tibet-video-13604084', + 'md5': 'ed846cfca037823fa6d3d0d7af8a4e8f', + 'info_dict': { + 'id': 'ljZjd0ajE6NNMhVJ3Gb-QfL1l0p-qW6-', + 'ext': 'mp4', + 'title': "Removing the 'negative influences of religion' in Tibet | Video", + 'description': 'md5:777989926133319de6f6501372175fbf', + }, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - + ooyala_id = ( self._search_regex( r'id="ooyala-\d+-((?:\d|\w|-)+)--\d+', webpage, 'ooyala id', @@ -44,5 +40,4 @@ class ChannelNewsAsiaIE(InfoExtractor): default=None, fatal=False)) return self.url_result( - 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id - ) + 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id)