From 2b0623b116f3d7f31fbd42870dedc43e327fad12 Mon Sep 17 00:00:00 2001 From: hyeeji Date: Tue, 7 Dec 2021 03:11:01 +0900 Subject: [PATCH 1/5] Test --- youtube_dl/extractor/extractors.py | 2 ++ youtube_dl/extractor/nate.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/nate.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6e8fc3961..bff72460e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -721,6 +721,8 @@ from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicTVIE, ) +from .nate import NateIE + from .naver import NaverIE from .nba import ( NBAWatchEmbedIE, diff --git a/youtube_dl/extractor/nate.py b/youtube_dl/extractor/nate.py new file mode 100644 index 000000000..b75fe2140 --- /dev/null +++ b/youtube_dl/extractor/nate.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor + + +class NateIE(InfoExtractor): + _VALID_URL = r'https?://(?:m\.)?tv\.nate\.com/clip/(?P[0-9]+)' + _API_BASE_TMPL = 'https://tv.nate.com/api/v1/clip/%s' + _TEST = { + 'url': 'https://tv.nate.com/clip/4300566', + #'md5': '02D3CAB3907B60C58043761F8B5BF2B3', + 'info_dict': { + 'id': '4300566', + 'ext': 'mp4', + 'title': '[심쿵엔딩] 이준호x이세영, 서로를 기억하며 끌어안는 두 사람!💕, MBC 211204 방송', + 'thumbnail': r're:^http?://.*\.jpg$', + 'upload_date': '20211204', + 'age_limit' : '15' + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + #video_data = self._download_json( + # 'https://tv.nate.com/api/v1/clip/% s'%video_id, video_id, headers=self.geo_verification_headers()) + video_data = self._download_json(url, video_id) + + title = video_data.get('clipTitle') + thumbnail = video_data.get('contentImg') + upload_date = video_data.get('regData') + age_limit = video_data.get('targetAge') + + + # TODO more code goes here, for example ... + + return { + 'id': video_id, + 'title': title, + 'thumbnail' : thumbnail, + 'upload_date' : upload_date[:8], + 'age_limit' : age_limit + # TODO more properties (see youtube_dl/extractor/common.py) + } From a0bb1d8c0d18e18b93afa801b3c1f399b16410ae Mon Sep 17 00:00:00 2001 From: hyeeji Date: Tue, 7 Dec 2021 13:18:15 +0900 Subject: [PATCH 2/5] Feat:add Site --- youtube_dl/extractor/nate.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nate.py b/youtube_dl/extractor/nate.py index b75fe2140..f98005d35 100644 --- a/youtube_dl/extractor/nate.py +++ b/youtube_dl/extractor/nate.py @@ -15,7 +15,7 @@ class NateIE(InfoExtractor): 'title': '[심쿵엔딩] 이준호x이세영, 서로를 기억하며 끌어안는 두 사람!💕, MBC 211204 방송', 'thumbnail': r're:^http?://.*\.jpg$', 'upload_date': '20211204', - 'age_limit' : '15' + 'age_limit' : 15 # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -30,13 +30,13 @@ class NateIE(InfoExtractor): #video_data = self._download_json( # 'https://tv.nate.com/api/v1/clip/% s'%video_id, video_id, headers=self.geo_verification_headers()) - video_data = self._download_json(url, video_id) + video_data = self._download_json('https://tv.nate.com/api/v1/clip/' + str(video_id), video_id) title = video_data.get('clipTitle') thumbnail = video_data.get('contentImg') - upload_date = video_data.get('regData') + upload_date = video_data.get('regDate') age_limit = video_data.get('targetAge') - + url = video_data['smcUriList'][4] # TODO more code goes here, for example ... @@ -45,6 +45,7 @@ class NateIE(InfoExtractor): 'title': title, 'thumbnail' : thumbnail, 'upload_date' : upload_date[:8], - 'age_limit' : age_limit + 'age_limit' : age_limit, + 'url': url # TODO more properties (see youtube_dl/extractor/common.py) } From 8cf64dcbbf2265399630ca0ffd162283719e4d7e Mon Sep 17 00:00:00 2001 From: Han Hyeji <77146030+Hyeeji@users.noreply.github.com> Date: Tue, 7 Dec 2021 19:09:23 +0900 Subject: [PATCH 3/5] [nate] add new site --- youtube_dl/extractor/nate.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nate.py b/youtube_dl/extractor/nate.py index f98005d35..86ac6bc6e 100644 --- a/youtube_dl/extractor/nate.py +++ b/youtube_dl/extractor/nate.py @@ -26,10 +26,7 @@ class NateIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - #video_data = self._download_json( - # 'https://tv.nate.com/api/v1/clip/% s'%video_id, video_id, headers=self.geo_verification_headers()) + video_data = self._download_json('https://tv.nate.com/api/v1/clip/' + str(video_id), video_id) title = video_data.get('clipTitle') @@ -38,7 +35,6 @@ class NateIE(InfoExtractor): age_limit = video_data.get('targetAge') url = video_data['smcUriList'][4] - # TODO more code goes here, for example ... return { 'id': video_id, @@ -47,5 +43,4 @@ class NateIE(InfoExtractor): 'upload_date' : upload_date[:8], 'age_limit' : age_limit, 'url': url - # TODO more properties (see youtube_dl/extractor/common.py) } From ba06388c8c17573831452f7dbfb669754ae721d3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 31 Aug 2023 16:34:09 +0100 Subject: [PATCH 4/5] Update from yt-dlp extractor --- youtube_dl/extractor/nate.py | 196 ++++++++++++++++++++++++++++++----- 1 file changed, 172 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/nate.py b/youtube_dl/extractor/nate.py index 86ac6bc6e..182ea95ed 100644 --- a/youtube_dl/extractor/nate.py +++ b/youtube_dl/extractor/nate.py @@ -1,46 +1,194 @@ # coding: utf-8 from __future__ import unicode_literals + +import itertools + from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + merge_dicts, + T, + traverse_obj, + txt_or_none, + unified_strdate, + url_or_none, +) -class NateIE(InfoExtractor): + +class NateBaseIE(InfoExtractor): + _API_BASE = 'https://tv.nate.com/api/v1/' + + def _download_webpage_handle(self, url_or_request, video_id, *args, **kwargs): + fatal = kwargs.get('fatal', True) + kwargs['fatal'] = False + res = super(NateBaseIE, self)._download_webpage_handle( + url_or_request, video_id, *args, **kwargs) + if not res: + if fatal: + raise ExtractorError('Failed to download webpage') + return res + status = res[1].getcode() + if 200 <= status < 400: + new_url = res[1].geturl() + if url_or_request != new_url and '/Error.html' in new_url: + raise ExtractorError( + 'Download redirected to Error.html: expired?', + expected=True) + else: + msg = 'Failed to download webpage: HTTP code %d' % status + if fatal: + raise ExtractorError(msg) + else: + self.report_warning(msg) + return res + + +class NateIE(NateBaseIE): _VALID_URL = r'https?://(?:m\.)?tv\.nate\.com/clip/(?P[0-9]+)' - _API_BASE_TMPL = 'https://tv.nate.com/api/v1/clip/%s' - _TEST = { + _TESTS = [{ + 'url': 'https://tv.nate.com/clip/1848976', + 'info_dict': { + 'id': '1848976', + 'ext': 'mp4', + 'title': '[결승 오프닝 타이틀] 2018 LCK 서머 스플릿 결승전 kt Rolster VS Griffin', + 'description': 'md5:e1b79a7dcf0d8d586443f11366f50e6f', + 'thumbnail': r're:^http?://.*\.jpg$', + 'upload_date': '20180908', + 'age_limit': 15, + 'duration': 73, + 'uploader': '2018 LCK 서머 스플릿(롤챔스)', + 'channel': '2018 LCK 서머 스플릿(롤챔스)', + 'channel_id': '3606', + 'uploader_id': '3606', + 'tags': 'count:59', + }, + 'skip': 'Redirect to Error.html', + }, { 'url': 'https://tv.nate.com/clip/4300566', - #'md5': '02D3CAB3907B60C58043761F8B5BF2B3', + # 'md5': '02D3CAB3907B60C58043761F8B5BF2B3', 'info_dict': { 'id': '4300566', 'ext': 'mp4', 'title': '[심쿵엔딩] 이준호x이세영, 서로를 기억하며 끌어안는 두 사람!💕, MBC 211204 방송', + 'description': 'md5:edf489c54ea2682c7973154b2089aa0e', 'thumbnail': r're:^http?://.*\.jpg$', 'upload_date': '20211204', - 'age_limit' : 15 - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) - } + 'age_limit': 15, + 'duration': 201, + 'uploader': '옷소매 붉은 끝동', + 'channel': '옷소매 붉은 끝동', + 'channel_id': '27987', + 'uploader_id': '27987', + 'tags': 'count:20', + }, + 'params': {'skip_download': True}, + }, { + 'url': 'https://tv.nate.com/clip/4764792', + 'info_dict': { + 'id': '4764792', + 'ext': 'mp4', + 'title': '흥을 돋우는 가야금 연주와 트롯의 만남⬈ ‘열두줄’♪ TV CHOSUN 230625 방송', + 'description': 'md5:85734d3f9daebe4aa4f20cc73bdcc90c', + 'upload_date': '20230625', + 'uploader_id': '29116', + 'uploader': '쇼퀸', + 'age_limit': 15, + 'thumbnail': r're:^http?://.*\.jpg$', + 'duration': 182, + 'channel': '쇼퀸', + 'channel_id': '29116', + 'tags': 'count:25', + }, + 'params': {'skip_download': True}, + }] + + _QUALITY = { + '36': 2160, + '35': 1080, + '34': 720, + '33': 480, + '32': 360, + '31': 270, } def _real_extract(self, url): video_id = self._match_id(url) - - video_data = self._download_json('https://tv.nate.com/api/v1/clip/' + str(video_id), video_id) - title = video_data.get('clipTitle') - thumbnail = video_data.get('contentImg') - upload_date = video_data.get('regDate') - age_limit = video_data.get('targetAge') - url = video_data['smcUriList'][4] + video_data, urlh = self._download_json_handle( + '{0}clip/{1}'.format(self._API_BASE, video_id), video_id, + fatal=False) + if not video_data: + raise ExtractorError('Empty programme JSON') + title = video_data['clipTitle'] + formats = [] + for f_url in traverse_obj(video_data, ('smcUriList', Ellipsis, T(url_or_none))): + fmt_id = f_url[-2:] + formats.append({ + 'format_id': fmt_id, + 'url': f_url, + 'height': self._QUALITY.get(fmt_id), + 'quality': int_or_none(fmt_id), + }) + self._sort_formats(formats) - return { + info = traverse_obj(video_data, { + 'uploader': ('programTitle', T(txt_or_none)), + 'uploader_id': ('programSeq', T(txt_or_none)), + }) + for up, ch in (('uploader', 'channel'), ('uploader_id', 'channel_id')): + info[ch] = info.get(up) + + return merge_dicts({ 'id': video_id, 'title': title, - 'thumbnail' : thumbnail, - 'upload_date' : upload_date[:8], - 'age_limit' : age_limit, - 'url': url - } + 'formats': formats, + }, info, traverse_obj(video_data, { + 'description': ('synopsis', T(txt_or_none)), + 'thumbnail': ('contentImg', T(url_or_none)), + 'upload_date': (('broadDate', 'regDate'), T(unified_strdate)), + 'age_limit': ('targetAge', T(int_or_none)), + 'duration': ('playTime', T(int_or_none)), + 'tags': ('hashTag', T(lambda s: s.split(',') or None)), + }, get_all=False)) + + +class NateProgramIE(NateBaseIE): + _VALID_URL = r'https?://tv\.nate\.com/program/clips/(?P[0-9]+)' + + _TESTS = [{ + 'url': 'https://tv.nate.com/program/clips/27987', + 'playlist_mincount': 191, + 'info_dict': { + 'id': '27987', + }, + }, { + 'url': 'https://tv.nate.com/program/clips/3606', + 'playlist_mincount': 15, + 'info_dict': { + 'id': '3606', + }, + 'skip': 'Redirect to Error.html', + }] + + def _entries(self, pl_id): + for page_num in itertools.count(1): + program_data, urlh = self._download_json_handle( + '{0}program/{1}/clip/ranking'.format(self._API_BASE, pl_id), + pl_id, query={'size': 20, 'page': page_num}, + note='Downloading page {0}'.format(page_num), fatal=False) + + empty = True + for clip_id in traverse_obj(program_data, ('content', Ellipsis, 'clipSeq', T(txt_or_none))): + yield self.url_result( + 'https://tv.nate.com/clip/%s' % clip_id, + ie=NateIE.ie_key(), video_id=clip_id) + empty = False + if traverse_obj(program_data, 'last') or (program_data and empty): + break + + def _real_extract(self, url): + pl_id = self._match_id(url) + return self.playlist_result(self._entries(pl_id), playlist_id=pl_id) From 1e4ea76441dd4a3d28cf8c5140ace7ae825b161a Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 31 Aug 2023 16:37:20 +0100 Subject: [PATCH 5/5] Include NateProgramIE --- youtube_dl/extractor/extractors.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bff72460e..4abbfb268 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -721,8 +721,10 @@ from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicTVIE, ) -from .nate import NateIE - +from .nate import ( + NateIE, + NateProgramIE, +) from .naver import NaverIE from .nba import ( NBAWatchEmbedIE,