From ec6e9ed8cd11179f7067f7ee64b31523e289eef9 Mon Sep 17 00:00:00 2001 From: Patrick Menschel Date: Fri, 19 May 2023 20:31:52 +0200 Subject: [PATCH 1/8] [PikSel] Feat: Add variable host support _call_api function was using hardcoded hosts --- youtube_dl/extractor/piksel.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index ecf56ff8f..aad4e7f7d 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -12,6 +12,7 @@ from ..utils import ( parse_iso8601, try_get, unescapeHTML, + urljoin ) @@ -73,10 +74,9 @@ class PikselIE(InfoExtractor): if mobj: return mobj.group('url') - def _call_api(self, app_token, resource, display_id, query, fatal=True): - response = (self._download_json( - 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token), - display_id, query=query, fatal=fatal) or {}).get('response') + def _call_api(self, app_token, resource, display_id, query, host="https://player.piksel.com", fatal=True): + url = urljoin(base=host, path='/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token)) + response = (self._download_json(url, display_id, query=query, fatal=fatal) or {}).get('response') failure = try_get(response, lambda x: x['failure']['reason']) if failure: if fatal: @@ -93,7 +93,7 @@ class PikselIE(InfoExtractor): ], webpage, 'app token') query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} program = self._call_api( - app_token, 'program', display_id, query)['WsProgramResponse']['program'] + app_token, 'program', display_id, query, url)['WsProgramResponse']['program'] video_id = program['uuid'] video_data = program['asset'] title = video_data['title'] @@ -143,7 +143,7 @@ class PikselIE(InfoExtractor): process_asset_files(try_get(self._call_api( app_token, 'asset_file', display_id, { 'assetid': asset_id, - }, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) + }, url, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) m3u8_url = dict_get(video_data, [ 'm3u8iPadURL', From 649cb61ee32fa24c4139a2f5b070900642f163bb Mon Sep 17 00:00:00 2001 From: Patrick Menschel Date: Fri, 19 May 2023 20:48:45 +0200 Subject: [PATCH 2/8] [nhk] Fix: Update to self-hosted URLs NHK now hosts it's api instead of redirecting to piksel host. Added test. Fixed illegal usage of child class property in base class. Minor Style Issues fixed by PyCharm reformat file. --- youtube_dl/extractor/nhk.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index f43d91cd5..fa9a6dde9 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -11,6 +11,7 @@ class NhkBaseIE(InfoExtractor): _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P[a-z]{2})/ondemand' _TYPE_REGEX = r'/(?Pvideo|audio)/' + _VALID_URL = r"" def _call_api(self, m_id, lang, is_video, is_episode, is_clip): return self._download_json( @@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor): def _extract_episode_info(self, url, episode=None): fetch_episode = episode is None - lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups() + lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() if len(episode_id) == 7: episode_id = episode_id[:4] + '-' + episode_id[4:] @@ -63,7 +64,7 @@ class NhkBaseIE(InfoExtractor): info.update({ '_type': 'url_transparent', 'ie_key': 'Piksel', - 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id, + 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, 'id': vod_id, }) else: @@ -90,8 +91,22 @@ class NhkVodIE(NhkBaseIE): # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2061601/', + 'info_dict': { + 'id': 'yd8322ch', + 'ext': 'mp4', + 'description': 'NHK WORLD-JAPAN presents a sumo highlights program for fans around the globe. Today the' + ' show features all top-division bouts from May 14, Day 1 of the Grand Sumo Tournament in' + ' Tokyo.', + 'title': 'GRAND SUMO Highlights - [Recap] May Tournament Day 1 (Opening Day)', + 'upload_date': '20230514', + 'timestamp': 1684083791, + }, + + }, { # video clip 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', + 'only_matching': True, 'md5': '7a90abcfe610ec22a6bfe15bd46b30ca', 'info_dict': { 'id': 'a95j5iza', @@ -146,7 +161,8 @@ class NhkVodIE(NhkBaseIE): class NhkVodProgramIE(NhkBaseIE): - _VALID_URL = r'%s/program%s(?P[0-9a-z]+)(?:.+?\btype=(?Pclip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) + _VALID_URL = r'%s/program%s(?P[0-9a-z]+)(?:.+?\btype=(?Pclip|(?:radio|tv)Episode))?' % ( + NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) _TESTS = [{ # video program episodes 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', From 34e6b4968baacf43f7ffee62f92ce48657837afc Mon Sep 17 00:00:00 2001 From: Patrick Menschel Date: Sat, 20 May 2023 11:54:44 +0200 Subject: [PATCH 3/8] [nhk] Fix: NhkVodProgramIE Playlists - Use correct class inheritance for video / playlist scheme - Move functions according to video / playlist scheme --- youtube_dl/extractor/nhk.py | 135 +++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index fa9a6dde9..572fe3118 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -11,7 +11,6 @@ class NhkBaseIE(InfoExtractor): _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P[a-z]{2})/ondemand' _TYPE_REGEX = r'/(?Pvideo|audio)/' - _VALID_URL = r"" def _call_api(self, m_id, lang, is_video, is_episode, is_clip): return self._download_json( @@ -22,68 +21,6 @@ class NhkBaseIE(InfoExtractor): m_id, lang, '/all' if is_video else ''), m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] - def _extract_episode_info(self, url, episode=None): - fetch_episode = episode is None - lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() - if len(episode_id) == 7: - episode_id = episode_id[:4] + '-' + episode_id[4:] - - is_video = m_type == 'video' - if fetch_episode: - episode = self._call_api( - episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] - title = episode.get('sub_title_clean') or episode['sub_title'] - - def get_clean_field(key): - return episode.get(key + '_clean') or episode.get(key) - - series = get_clean_field('title') - - thumbnails = [] - for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: - img_path = episode.get('image' + s) - if not img_path: - continue - thumbnails.append({ - 'id': '%dp' % h, - 'height': h, - 'width': w, - 'url': 'https://www3.nhk.or.jp' + img_path, - }) - - info = { - 'id': episode_id + '-' + lang, - 'title': '%s - %s' % (series, title) if series and title else title, - 'description': get_clean_field('description'), - 'thumbnails': thumbnails, - 'series': series, - 'episode': title, - } - if is_video: - vod_id = episode['vod_id'] - info.update({ - '_type': 'url_transparent', - 'ie_key': 'Piksel', - 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, - 'id': vod_id, - }) - else: - if fetch_episode: - audio_path = episode['audio']['audio'] - info['formats'] = self._extract_m3u8_formats( - 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, - episode_id, 'm4a', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - for f in info['formats']: - f['language'] = lang - else: - info.update({ - '_type': 'url_transparent', - 'ie_key': NhkVodIE.ie_key(), - 'url': url, - }) - return info - class NhkVodIE(NhkBaseIE): # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg @@ -156,15 +93,85 @@ class NhkVodIE(NhkBaseIE): } }] + def _extract_episode_info(self, url, episode=None): + print(url) + fetch_episode = episode is None + lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups() + if len(episode_id) == 7: + episode_id = episode_id[:4] + '-' + episode_id[4:] + + is_video = m_type == 'video' + if fetch_episode: + episode = self._call_api( + episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] + title = episode.get('sub_title_clean') or episode['sub_title'] + + def get_clean_field(key): + return episode.get(key + '_clean') or episode.get(key) + + series = get_clean_field('title') + + thumbnails = [] + for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: + img_path = episode.get('image' + s) + if not img_path: + continue + thumbnails.append({ + 'id': '%dp' % h, + 'height': h, + 'width': w, + 'url': 'https://www3.nhk.or.jp' + img_path, + }) + + info = { + 'id': episode_id + '-' + lang, + 'title': '%s - %s' % (series, title) if series and title else title, + 'description': get_clean_field('description'), + 'thumbnails': thumbnails, + 'series': series, + 'episode': title, + } + if is_video: + vod_id = episode['vod_id'] + info.update({ + '_type': 'url_transparent', + 'ie_key': 'Piksel', + 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, + 'id': vod_id, + }) + else: + if fetch_episode: + audio_path = episode['audio']['audio'] + info['formats'] = self._extract_m3u8_formats( + 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, + episode_id, 'm4a', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) + for f in info['formats']: + f['language'] = lang + else: + info.update({ + '_type': 'url_transparent', + 'ie_key': NhkVodIE.ie_key(), + 'url': url, + }) + return info + def _real_extract(self, url): return self._extract_episode_info(url) -class NhkVodProgramIE(NhkBaseIE): +class NhkVodProgramIE(NhkVodIE): _VALID_URL = r'%s/program%s(?P[0-9a-z]+)(?:.+?\btype=(?Pclip|(?:radio|tv)Episode))?' % ( NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) _TESTS = [{ # video program episodes + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo', + 'info_dict': { + 'id': 'sumo', + 'title': 'GRAND SUMO Highlights', + }, + 'playlist_mincount': 1, + }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', 'info_dict': { 'id': 'japanrailway', From 2fa52b7cb197601d81d60f2234c52e91a1a97218 Mon Sep 17 00:00:00 2001 From: Patrick Menschel Date: Sat, 20 May 2023 12:07:04 +0200 Subject: [PATCH 4/8] [nhk] Remove debug prints --- youtube_dl/extractor/nhk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 572fe3118..8ebd1969f 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -94,7 +94,6 @@ class NhkVodIE(NhkBaseIE): }] def _extract_episode_info(self, url, episode=None): - print(url) fetch_episode = episode is None lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups() if len(episode_id) == 7: From 322e222ce39c1bc05846c24aea01c47731768e25 Mon Sep 17 00:00:00 2001 From: menschel Date: Fri, 26 May 2023 17:13:18 +0200 Subject: [PATCH 5/8] Apply suggestions from code review Co-authored-by: dirkf --- youtube_dl/extractor/nhk.py | 1 - youtube_dl/extractor/piksel.py | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 8ebd1969f..9bff8eab5 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -39,7 +39,6 @@ class NhkVodIE(NhkBaseIE): 'upload_date': '20230514', 'timestamp': 1684083791, }, - }, { # video clip 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index aad4e7f7d..68611953f 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -12,7 +12,7 @@ from ..utils import ( parse_iso8601, try_get, unescapeHTML, - urljoin + urljoin, ) @@ -74,9 +74,10 @@ class PikselIE(InfoExtractor): if mobj: return mobj.group('url') - def _call_api(self, app_token, resource, display_id, query, host="https://player.piksel.com", fatal=True): - url = urljoin(base=host, path='/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token)) - response = (self._download_json(url, display_id, query=query, fatal=fatal) or {}).get('response') + def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True): + url = urljoin(host, '/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token)) + response = self._download_json(url, display_id, query=query, fatal=fatal) + response = traverse_obj(response, 'response', expected_type=dict) or {} failure = try_get(response, lambda x: x['failure']['reason']) if failure: if fatal: From 3f12577bc38f6c998ce5b74ebc918fb66b2bd3f5 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 26 May 2023 23:03:42 +0100 Subject: [PATCH 6/8] Update youtube_dl/extractor/piksel.py --- youtube_dl/extractor/piksel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index 68611953f..cde973ac2 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_iso8601, + traverse_obj, try_get, unescapeHTML, urljoin, From 42e8f49d9e73ed316840f050b55ddd5a95b2c318 Mon Sep 17 00:00:00 2001 From: menschel Date: Sat, 27 May 2023 10:09:44 +0200 Subject: [PATCH 7/8] Update youtube_dl/extractor/piksel.py Co-authored-by: dirkf --- youtube_dl/extractor/piksel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index cde973ac2..6bf16e19e 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, parse_iso8601, traverse_obj, + traverse_obj, try_get, unescapeHTML, urljoin, From 8c6aa13dbd1726cc662c13f58c16dd8193d89f4f Mon Sep 17 00:00:00 2001 From: Patrick Menschel Date: Mon, 29 May 2023 11:53:22 +0200 Subject: [PATCH 8/8] [nhk] Remove duplicate import after review --- youtube_dl/extractor/piksel.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index 6bf16e19e..cde973ac2 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -11,7 +11,6 @@ from ..utils import ( int_or_none, parse_iso8601, traverse_obj, - traverse_obj, try_get, unescapeHTML, urljoin,