From 7907e765bf39216ebd60831116f1c6db535a53de Mon Sep 17 00:00:00 2001 From: schn0sch Date: Sun, 27 Dec 2020 21:48:24 +0100 Subject: [PATCH] [yourporn] added support for posts with multiple videos closes #27554 --- youtube_dl/extractor/yourporn.py | 95 +++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 98347491e..b552efe2a 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -30,38 +30,71 @@ class YourPornIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - parts = self._parse_json( - self._search_regex( - r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', - group='data'), - video_id)[video_id].split('/') - - num = 0 - for c in parts[6] + parts[7]: - if c.isnumeric(): - num += int(c) - parts[5] = compat_str(int(parts[5]) - num) - parts[1] += '8' - video_url = urljoin(url, '/'.join(parts)) - - title = (self._search_regex( - r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', - default=None) or self._og_search_description(webpage)).strip() - thumbnail = self._og_search_thumbnail(webpage) - duration = parse_duration(self._search_regex( + get_duration = lambda webpage: parse_duration(self._search_regex( r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', default=None)) - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'age_limit': 18, - 'ext': 'mp4', + # Only for posts containing a single video is the post_id equal to + # video_id. If there a multiple videos there also exists a post with the + # video_id and we use this page to fetch the video title. + post_id = self._match_id(url) + + webpage = self._download_webpage(url, post_id) + + videos = self._parse_json( + self._search_regex( + r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', + group='data'), + post_id) + + for video_id in videos: + parts = videos[video_id].split('/') + num = 0 + for c in parts[6] + parts[7]: + if c.isnumeric(): + num += int(c) + parts[5] = compat_str(int(parts[5]) - num) + parts[1] += '8' + videos[video_id] = urljoin(url, '/'.join(parts)) + + # If there ist only one video in the post (as is most likely) the + # video_id is equal to post_id and we save us from re-fetching the page + # to obtain the meta data. + # This may fail but if needed will be obtained in the next step. + titles = { + post_id: self._og_search_description(webpage, default=None) } + thumbnails = { + post_id: self._og_search_thumbnail(webpage, default=None) + } + durations = { + post_id: get_duration(webpage) + } + + # obtain missing metadata for all videos in the post + for video_id in videos: + if not titles.get(video_id) or not thumbnails.get(video_id) or video_id not in durations: + webpage = self._download_webpage('https://sxyprn.com/post/%s.html' % video_id, video_id) + if not titles.get(video_id): + titles[video_id] = self._og_search_description(webpage) + if not thumbnails.get(video_id): + thumbnails[video_id] = self._og_search_thumbnail(webpage) + if video_id not in durations: + durations[video_id] = get_duration(webpage) + + entries = [] + for video_id in videos: + entries.append({ + 'id': video_id, + 'url': videos[video_id], + 'title': titles[video_id], + 'thumbnail': thumbnails[video_id], + 'duration': durations[video_id], + 'age_limit': 18, + 'ext': 'mp4', + }) + + if len(entries) == 1: + return entries[0] + else: + return self.playlist_result(entries, post_id, titles[post_id])