youtube-dl/youtube_dl/extractor/cloudy.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_parse_qs,
    compat_HTTPError,
)
from ..utils import (
    ExtractorError,
    HEADRequest,
    remove_end,
)


class CloudyIE(InfoExtractor):
    _IE_DESC = 'cloudy.ec'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?cloudy\.ec/
        (?:v/|embed\.php\?id=)
        (?P<id>[A-Za-z0-9]+)
        '''
    _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
    _API_URL = 'http://www.cloudy.ec/api/player.api.php'
    _MAX_TRIES = 2
    _TEST = {
        'url': 'https://www.cloudy.ec/v/af511e2527aac',
        'md5': '5cb253ace826a42f35b4740539bedf07',
        'info_dict': {
            'id': 'af511e2527aac',
            'ext': 'flv',
            'title': 'Funny Cats and Animals Compilation june 2013',
        }
    }

    def _extract_video(self, video_id, file_key, error_url=None, try_num=0):

        if try_num > self._MAX_TRIES - 1:
            raise ExtractorError('Unable to extract video URL', expected=True)

        form = {
            'file': video_id,
            'key': file_key,
        }

        if error_url:
            form.update({
                'numOfErrors': try_num,
                'errorCode': '404',
                'errorUrl': error_url,
            })

        player_data = self._download_webpage(
            self._API_URL, video_id, 'Downloading player data', query=form)
        data = compat_parse_qs(player_data)

        try_num += 1

        if 'error' in data:
            raise ExtractorError(
                '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
                expected=True)

        title = data.get('title', [None])[0]
        if title:
            title = remove_end(title, '&asdasdas').strip()

        video_url = data.get('url', [None])[0]

        if video_url:
            try:
                self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
            except ExtractorError as e:
                if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
                    self.report_warning('Invalid video URL, requesting another', video_id)
                    return self._extract_video(video_id, file_key, video_url, try_num)

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
        }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        url = self._EMBED_URL % video_id
        webpage = self._download_webpage(url, video_id)

        file_key = self._search_regex(
            [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
            webpage, 'file_key')

        return self._extract_video(video_id, file_key)
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 10:23:40 +00:00			`from ..compat import (`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`compat_parse_qs,`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`compat_HTTPError,`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`)`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 10:23:40 +00:00			`from ..utils import (`
			`ExtractorError,`
			`HEADRequest,`
			`remove_end,`
			`)`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00

			`class CloudyIE(InfoExtractor):`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`_IE_DESC = 'cloudy.ec'`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`_VALID_URL = r'''(?x)`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`https?://(?:www\.)?cloudy\.ec/`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`(?:v/\|embed\.php\?id=)`
			`(?P<id>[A-Za-z0-9]+)`
			`'''`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'`
			`_API_URL = 'http://www.cloudy.ec/api/player.api.php'`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`_MAX_TRIES = 2`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`_TEST = {`
			`'url': 'https://www.cloudy.ec/v/af511e2527aac',`
			`'md5': '5cb253ace826a42f35b4740539bedf07',`
			`'info_dict': {`
			`'id': 'af511e2527aac',`
			`'ext': 'flv',`
			`'title': 'Funny Cats and Animals Compilation june 2013',`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`}`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`}`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`def _extract_video(self, video_id, file_key, error_url=None, try_num=0):`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`if try_num > self._MAX_TRIES - 1:`
			`raise ExtractorError('Unable to extract video URL', expected=True)`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`form = {`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`'file': video_id,`
			`'key': file_key,`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`}`

			`if error_url:`
			`form.update({`
			`'numOfErrors': try_num,`
			`'errorCode': '404',`
			`'errorUrl': error_url,`
			`})`

[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`player_data = self._download_webpage(`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`self._API_URL, video_id, 'Downloading player data', query=form)`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`data = compat_parse_qs(player_data)`

[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`try_num += 1`

[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`if 'error' in data:`
			`raise ExtractorError(`
			`'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),`
			`expected=True)`

			`title = data.get('title', [None])[0]`
			`if title:`
[cloudy] Minor changes 2014-09-13 22:01:25 +00:00			`title = remove_end(title, '&asdasdas').strip()`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
[cloudy] Minor changes 2014-09-13 22:01:25 +00:00			`video_url = data.get('url', [None])[0]`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
[cloudy] Minor changes 2014-09-13 22:01:25 +00:00			`if video_url:`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`try:`
			`self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')`
			`except ExtractorError as e:`
			`if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:`
			`self.report_warning('Invalid video URL, requesting another', video_id)`
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`return self._extract_video(video_id, file_key, video_url, try_num)`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00
			`return {`
			`'id': video_id,`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`'url': video_url,`
[cloudy] Add new extractor. Closes #3743 2014-09-13 21:12:36 +00:00			`'title': title,`
			`}`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`

[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`url = self._EMBED_URL % video_id`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00			`webpage = self._download_webpage(url, video_id)`

			`file_key = self._search_regex(`
[cloudy] Fix key extraction (Closes #5211) 2015-03-15 16:42:13 +00:00			`[r'key\s:\s"([^"]+)"', r'filekey\s=\s"([^"]+)"'],`
			`webpage, 'file_key')`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 12:04:16 +00:00
[cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. 2016-07-15 17:21:20 +00:00			`return self._extract_video(video_id, file_key)`