[openload] remove OpenLoad related extractors(closes #11999)(closes #15406)

2025-04-25 03:38:20 +00:00 · 2019-11-26 23:57:37 +01:00 · 2019-11-26 23:57:37 +01:00 · 6471d0d3b8
commit 6471d0d3b8
parent 5ef62fc4ce
4 changed files with 0 additions and 412 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -796,10 +796,6 @@ from .ooyala import (
    OoyalaIE,
    OoyalaExternalIE,
 )
 from .openload import (
    OpenloadIE,
    VerystreamIE,
 )
 from .ora import OraTVIE
 from .orf import (
    ORFTVthekIE,
@ -1060,7 +1056,6 @@ from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamable import StreamableIE
 from .streamango import StreamangoIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streetvoice import StreetVoiceIE
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -88,10 +88,6 @@ from .piksel import PikselIE
 from .videa import VideaIE
 from .twentymin import TwentyMinutenIE
 from .ustream import UstreamIE
 from .openload import (
    OpenloadIE,
    VerystreamIE,
 )
 from .videopress import VideoPressIE
 from .rutube import RutubeIE
 from .limelight import LimelightBaseIE
@ -3048,18 +3044,6 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
        # Look for Openload embeds
        openload_urls = OpenloadIE._extract_urls(webpage)
        if openload_urls:
            return self.playlist_from_matches(
                openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
        # Look for Verystream embeds
        verystream_urls = VerystreamIE._extract_urls(webpage)
        if verystream_urls:
            return self.playlist_from_matches(
                verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
        # Look for VideoPress embeds
        videopress_urls = VideoPressIE._extract_urls(webpage)
        if videopress_urls:
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@ -3,21 +3,17 @@ from __future__ import unicode_literals
 import json
 import os
 import re
 import subprocess
 import tempfile
 from .common import InfoExtractor
 from ..compat import (
    compat_urlparse,
    compat_kwargs,
 )
 from ..utils import (
    check_executable,
    determine_ext,
    encodeArgument,
    ExtractorError,
    get_element_by_id,
    get_exe_version,
    is_outdated_version,
    std_headers,
@ -240,262 +236,3 @@ class PhantomJSwrapper(object):
        self._load_cookies()
        return (html, encodeArgument(out))
 class OpenloadIE(InfoExtractor):
    _DOMAINS = r'''
                    (?:
                        openload\.(?:co|io|link|pw)|
                        oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)|
                        oladblock\.(?:services|xyz|me)|openloed\.co
                    )
                '''
    _VALID_URL = r'''(?x)
                    https?://
                        (?P<host>
                            (?:www\.)?
                            %s
                        )/
                        (?:f|embed)/
                        (?P<id>[a-zA-Z0-9-_]+)
                    ''' % _DOMAINS
    _EMBED_WORD = 'embed'
    _STREAM_WORD = 'f'
    _REDIR_WORD = 'stream'
    _URL_IDS = ('streamurl', 'streamuri', 'streamurj')
    _TESTS = [{
        'url': 'https://openload.co/f/kUEfGclsU9o',
        'md5': 'bf1c059b004ebc7a256f89408e65c36e',
        'info_dict': {
            'id': 'kUEfGclsU9o',
            'ext': 'mp4',
            'title': 'skyrim_no-audio_1080.mp4',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
        'url': 'https://openload.co/embed/rjC09fkPLYs',
        'info_dict': {
            'id': 'rjC09fkPLYs',
            'ext': 'mp4',
            'title': 'movie.mp4',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': {
                'en': [{
                    'ext': 'vtt',
                }],
            },
        },
        'params': {
            'skip_download': True,  # test subtitles only
        },
    }, {
        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
        'only_matching': True,
    }, {
        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
        'only_matching': True,
    }, {
        'url': 'https://openload.co/f/_-ztPaZtMhM/',
        'only_matching': True,
    }, {
        # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
        # for title and ext
        'url': 'https://openload.co/embed/Sxz5sADo82g/',
        'only_matching': True,
    }, {
        # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
        # via https://openload.co/f/e-Ixz9ZR5L0/
        'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
        'only_matching': True,
    }, {
        'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
        'only_matching': True,
    }, {
        'url': 'http://www.openload.link/f/KnG-kKZdcfY',
        'only_matching': True,
    }, {
        'url': 'https://oload.stream/f/KnG-kKZdcfY',
        'only_matching': True,
    }, {
        'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
        'only_matching': True,
    }, {
        'url': 'https://oload.win/f/kUEfGclsU9o',
        'only_matching': True,
    }, {
        'url': 'https://oload.download/f/kUEfGclsU9o',
        'only_matching': True,
    }, {
        'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
        'only_matching': True,
    }, {
        # Its title has not got its extension but url has it
        'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
        'only_matching': True,
    }, {
        'url': 'https://oload.cc/embed/5NEAbI2BDSk',
        'only_matching': True,
    }, {
        'url': 'https://oload.icu/f/-_i4y_F_Hs8',
        'only_matching': True,
    }, {
        'url': 'https://oload.fun/f/gb6G1H4sHXY',
        'only_matching': True,
    }, {
        'url': 'https://oload.club/f/Nr1L-aZ2dbQ',
        'only_matching': True,
    }, {
        'url': 'https://oload.info/f/5NEAbI2BDSk',
        'only_matching': True,
    }, {
        'url': 'https://openload.pw/f/WyKgK8s94N0',
        'only_matching': True,
    }, {
        'url': 'https://oload.pw/f/WyKgK8s94N0',
        'only_matching': True,
    }, {
        'url': 'https://oload.live/f/-Z58UZ-GR4M',
        'only_matching': True,
    }, {
        'url': 'https://oload.space/f/IY4eZSst3u8/',
        'only_matching': True,
    }, {
        'url': 'https://oload.services/embed/bs1NWj1dCag/',
        'only_matching': True,
    }, {
        'url': 'https://oload.online/f/W8o2UfN1vNY/',
        'only_matching': True,
    }, {
        'url': 'https://oload.monster/f/W8o2UfN1vNY/',
        'only_matching': True,
    }, {
        'url': 'https://oload.press/embed/drTBl1aOTvk/',
        'only_matching': True,
    }, {
        'url': 'https://oload.website/embed/drTBl1aOTvk/',
        'only_matching': True,
    }, {
        'url': 'https://oload.life/embed/oOzZjNPw9Dc/',
        'only_matching': True,
    }, {
        'url': 'https://oload.biz/f/bEk3Gp8ARr4/',
        'only_matching': True,
    }, {
        'url': 'https://oload.best/embed/kkz9JgVZeWc/',
        'only_matching': True,
    }, {
        'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
        'only_matching': True,
    }, {
        'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/',
        'only_matching': True,
    }, {
        'url': 'https://oladblock.me/f/b8NWEgkqNLI/',
        'only_matching': True,
    }, {
        'url': 'https://openloed.co/f/b8NWEgkqNLI/',
        'only_matching': True,
    }, {
        'url': 'https://oload.vip/f/kUEfGclsU9o',
        'only_matching': True,
    }]
    @classmethod
    def _extract_urls(cls, webpage):
        return re.findall(
            r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
            % (cls._DOMAINS, cls._EMBED_WORD), webpage)
    def _extract_decrypted_page(self, page_url, webpage, video_id):
        phantom = PhantomJSwrapper(self, required_version='2.0')
        webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id)
        return webpage
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        video_id = mobj.group('id')
        url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
        for path in (self._EMBED_WORD, self._STREAM_WORD):
            page_url = url_pattern % path
            last = path == self._STREAM_WORD
            webpage = self._download_webpage(
                page_url, video_id, 'Downloading %s webpage' % path,
                fatal=last)
            if not webpage:
                continue
            if 'File not found' in webpage or 'deleted by the owner' in webpage:
                if not last:
                    continue
                raise ExtractorError('File not found', expected=True, video_id=video_id)
            break
        webpage = self._extract_decrypted_page(page_url, webpage, video_id)
        for element_id in self._URL_IDS:
            decoded_id = get_element_by_id(element_id, webpage)
            if decoded_id:
                break
        if not decoded_id:
            decoded_id = self._search_regex(
                (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
                 r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
                 r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
                 r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
                 r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
                'stream URL')
        video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id)
        title = self._og_search_title(webpage, default=None) or self._search_regex(
            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
            'title', default=None) or self._html_search_meta(
            'description', webpage, 'title', fatal=True)
        entries = self._parse_html5_media_entries(page_url, webpage, video_id)
        entry = entries[0] if entries else {}
        subtitles = entry.get('subtitles')
        return {
            'id': video_id,
            'title': title,
            'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
            'url': video_url,
            'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
            'subtitles': subtitles,
        }
 class VerystreamIE(OpenloadIE):
    IE_NAME = 'verystream'
    _DOMAINS = r'(?:verystream\.com|woof\.tube)'
    _VALID_URL = r'''(?x)
                    https?://
                        (?P<host>
                            (?:www\.)?
                            %s
                        )/
                        (?:stream|e)/
                        (?P<id>[a-zA-Z0-9-_]+)
                    ''' % _DOMAINS
    _EMBED_WORD = 'e'
    _STREAM_WORD = 'stream'
    _REDIR_WORD = 'gettoken'
    _URL_IDS = ('videolink', )
    _TESTS = [{
        'url': 'https://verystream.com/stream/c1GWQ9ngBBx/',
        'md5': 'd3e8c5628ccb9970b65fd65269886795',
        'info_dict': {
            'id': 'c1GWQ9ngBBx',
            'ext': 'mp4',
            'title': 'Big Buck Bunny.mp4',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
        'url': 'https://verystream.com/e/c1GWQ9ngBBx/',
        'only_matching': True,
    }]
    def _extract_decrypted_page(self, page_url, webpage, video_id):
        return webpage  # for Verystream, the webpage is already decrypted
--- a/youtube_dl/extractor/streamango.py
+++ b/youtube_dl/extractor/streamango.py
@ -1,128 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_chr
 from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    js_to_json,
 )
 class StreamangoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
        'md5': 'e992787515a182f55e38fc97588d802a',
        'info_dict': {
            'id': 'clapasobsptpkdfe',
            'ext': 'mp4',
            'title': '20170315_150006.mp4',
        }
    }, {
        # no og:title
        'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
        'info_dict': {
            'id': 'foqebrpftarclpob',
            'ext': 'mp4',
            'title': 'foqebrpftarclpob',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'gone',
    }, {
        'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
        'only_matching': True,
    }, {
        'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
        'only_matching': True,
    }, {
        'url': 'https://streamcherry.com/f/clapasobsptpkdfe/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        def decrypt_src(encoded, val):
            ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
            encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
            decoded = ''
            sm = [None] * 4
            i = 0
            str_len = len(encoded)
            while i < str_len:
                for j in range(4):
                    sm[j % 4] = ALPHABET.index(encoded[i])
                    i += 1
                char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
                decoded += compat_chr(char_code)
                if sm[2] != 0x40:
                    char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
                    decoded += compat_chr(char_code)
                if sm[3] != 0x40:
                    char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
                    decoded += compat_chr(char_code)
            return decoded
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage, default=video_id)
        formats = []
        for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
            mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
            if mobj is None:
                continue
            format_ = format_.replace(mobj.group(0), '')
            video = self._parse_json(
                format_, video_id, transform_source=js_to_json,
                fatal=False) or {}
            mobj = re.search(
                r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
                mobj.group(1))
            if mobj is None:
                continue
            src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
            if not src:
                continue
            ext = determine_ext(src, default_ext=None)
            if video.get('type') == 'application/dash+xml' or ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
                    src, video_id, mpd_id='dash', fatal=False))
            else:
                formats.append({
                    'url': src,
                    'ext': ext or 'mp4',
                    'width': int_or_none(video.get('width')),
                    'height': int_or_none(video.get('height')),
                    'tbr': int_or_none(video.get('bitrate')),
                })
        if not formats:
            error = self._search_regex(
                r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
                'error', default=None)
            if not error and '>Sorry' in webpage:
                error = 'Video %s is not available' % video_id
            if error:
                raise ExtractorError(error, expected=True)
        self._sort_formats(formats)
        return {
            'id': video_id,
            'url': url,
            'title': title,
            'formats': formats,
        }