diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index a1ca791ca..0245ce2fa 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -1,33 +1,49 @@ +# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( determine_ext, ExtractorError, int_or_none, + join_nonempty, merge_dicts, str_to_int, + T, + traverse_obj, unified_strdate, url_or_none, + urljoin, ) class RedTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)' + _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)'] _TESTS = [{ - 'url': 'http://www.redtube.com/66418', - 'md5': 'fc08071233725f26b8f014dba9590005', + 'url': 'https://www.redtube.com/38864951', + 'md5': 'd7de9cb32e8adb3f6379f1a30f655fae', 'info_dict': { - 'id': '66418', + 'id': '38864951', 'ext': 'mp4', - 'title': 'Sucked on a toilet', - 'upload_date': '20110811', - 'duration': 596, + 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu', + 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: LeoLulu', + 'upload_date': '20210111', + 'timestamp': 1610343109, + 'duration': 646, 'view_count': int, 'age_limit': 18, - } + 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg', + }, + 'expected_warnings': [ + 'Failed to download m3u8 information: HTTP Error 404', + ], + 'params': { + 'format': '[format_id !^= hls]', + }, }, { 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', 'only_matching': True, @@ -36,11 +52,11 @@ class RedTubeIE(InfoExtractor): 'only_matching': True, }] - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)', - webpage) + @classmethod + def _extract_urls(cls, webpage): + for embed_re in cls._EMBED_REGEX: + for from_ in re.findall(embed_re, webpage): + yield from_ def _real_extract(self, url): video_id = self._match_id(url) @@ -71,40 +87,53 @@ class RedTubeIE(InfoExtractor): self._search_regex( r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), video_id, fatal=False) - if sources and isinstance(sources, dict): - for format_id, format_url in sources.items(): - if format_url: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'height': int_or_none(format_id), - }) - medias = self._parse_json( - self._search_regex( - r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, - 'media definitions', default='{}'), - video_id, fatal=False) - if medias and isinstance(medias, list): - for media in medias: - format_url = url_or_none(media.get('videoUrl')) + + def full_url(u): + return urljoin(url, u) + + for fmt in traverse_obj(sources, (T(dict.items), { + 'url': (1, T(full_url)), + 'format_id': (2, T(compat_str)), + 'height': (2, T(int_or_none)), })): + if 'url' in fmt: + formats.append(fmt) + + medias = self._search_regex( + r'''mediaDefinitions?["']?\s*:\s*(\[[\s\S]+?}\s*\])''', webpage, + 'media definitions', default='{}') + medias = self._parse_json(medias, video_id, fatal=False) + for fmt in traverse_obj(medias, (Ellipsis, T(dict))): + format_url = full_url(fmt.get('videoUrl')) + if not format_url: + continue + more_media = None + if fmt['format'] == 'hls' or (fmt['format'] == 'mp4' and not fmt.get('quality')): + more_media = self._download_json(format_url, video_id, fatal=False) + if more_media is None: + more_media = [fmt] + for fmt in traverse_obj(more_media, (Ellipsis, { + 'url': ('videoUrl', T(full_url)), + 'ext': ('format', T(compat_str)), + 'format_id': ('quality', T(compat_str)), })): + format_url = fmt.get('url') if not format_url: continue - if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8': + if fmt.get('ext') == 'hls' or determine_ext(format_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) continue - format_id = media.get('quality') - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'height': int_or_none(format_id), - }) + fmt['height'] = int_or_none(fmt.get('format_id')) + fmt['format_id'] = join_nonempty('ext', 'format_id', from_dict=fmt) + formats.append(fmt) if not formats: - video_url = self._html_search_regex( - r'', webpage, 'video URL') - formats.append({'url': video_url}) + video_url = url_or_none(self._html_search_regex( + r'', webpage, 'video URL')) + if video_url: + formats.append({'url': video_url}) + + self._check_formats(formats, video_id) self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage)