From 34e6b4968baacf43f7ffee62f92ce48657837afc Mon Sep 17 00:00:00 2001 From: Patrick Menschel Date: Sat, 20 May 2023 11:54:44 +0200 Subject: [PATCH] [nhk] Fix: NhkVodProgramIE Playlists - Use correct class inheritance for video / playlist scheme - Move functions according to video / playlist scheme --- youtube_dl/extractor/nhk.py | 135 +++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index fa9a6dde9..572fe3118 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -11,7 +11,6 @@ class NhkBaseIE(InfoExtractor): _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P[a-z]{2})/ondemand' _TYPE_REGEX = r'/(?Pvideo|audio)/' - _VALID_URL = r"" def _call_api(self, m_id, lang, is_video, is_episode, is_clip): return self._download_json( @@ -22,68 +21,6 @@ class NhkBaseIE(InfoExtractor): m_id, lang, '/all' if is_video else ''), m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] - def _extract_episode_info(self, url, episode=None): - fetch_episode = episode is None - lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() - if len(episode_id) == 7: - episode_id = episode_id[:4] + '-' + episode_id[4:] - - is_video = m_type == 'video' - if fetch_episode: - episode = self._call_api( - episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] - title = episode.get('sub_title_clean') or episode['sub_title'] - - def get_clean_field(key): - return episode.get(key + '_clean') or episode.get(key) - - series = get_clean_field('title') - - thumbnails = [] - for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: - img_path = episode.get('image' + s) - if not img_path: - continue - thumbnails.append({ - 'id': '%dp' % h, - 'height': h, - 'width': w, - 'url': 'https://www3.nhk.or.jp' + img_path, - }) - - info = { - 'id': episode_id + '-' + lang, - 'title': '%s - %s' % (series, title) if series and title else title, - 'description': get_clean_field('description'), - 'thumbnails': thumbnails, - 'series': series, - 'episode': title, - } - if is_video: - vod_id = episode['vod_id'] - info.update({ - '_type': 'url_transparent', - 'ie_key': 'Piksel', - 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, - 'id': vod_id, - }) - else: - if fetch_episode: - audio_path = episode['audio']['audio'] - info['formats'] = self._extract_m3u8_formats( - 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, - episode_id, 'm4a', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - for f in info['formats']: - f['language'] = lang - else: - info.update({ - '_type': 'url_transparent', - 'ie_key': NhkVodIE.ie_key(), - 'url': url, - }) - return info - class NhkVodIE(NhkBaseIE): # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg @@ -156,15 +93,85 @@ class NhkVodIE(NhkBaseIE): } }] + def _extract_episode_info(self, url, episode=None): + print(url) + fetch_episode = episode is None + lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups() + if len(episode_id) == 7: + episode_id = episode_id[:4] + '-' + episode_id[4:] + + is_video = m_type == 'video' + if fetch_episode: + episode = self._call_api( + episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] + title = episode.get('sub_title_clean') or episode['sub_title'] + + def get_clean_field(key): + return episode.get(key + '_clean') or episode.get(key) + + series = get_clean_field('title') + + thumbnails = [] + for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: + img_path = episode.get('image' + s) + if not img_path: + continue + thumbnails.append({ + 'id': '%dp' % h, + 'height': h, + 'width': w, + 'url': 'https://www3.nhk.or.jp' + img_path, + }) + + info = { + 'id': episode_id + '-' + lang, + 'title': '%s - %s' % (series, title) if series and title else title, + 'description': get_clean_field('description'), + 'thumbnails': thumbnails, + 'series': series, + 'episode': title, + } + if is_video: + vod_id = episode['vod_id'] + info.update({ + '_type': 'url_transparent', + 'ie_key': 'Piksel', + 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, + 'id': vod_id, + }) + else: + if fetch_episode: + audio_path = episode['audio']['audio'] + info['formats'] = self._extract_m3u8_formats( + 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, + episode_id, 'm4a', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) + for f in info['formats']: + f['language'] = lang + else: + info.update({ + '_type': 'url_transparent', + 'ie_key': NhkVodIE.ie_key(), + 'url': url, + }) + return info + def _real_extract(self, url): return self._extract_episode_info(url) -class NhkVodProgramIE(NhkBaseIE): +class NhkVodProgramIE(NhkVodIE): _VALID_URL = r'%s/program%s(?P[0-9a-z]+)(?:.+?\btype=(?Pclip|(?:radio|tv)Episode))?' % ( NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) _TESTS = [{ # video program episodes + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo', + 'info_dict': { + 'id': 'sumo', + 'title': 'GRAND SUMO Highlights', + }, + 'playlist_mincount': 1, + }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', 'info_dict': { 'id': 'japanrailway',