youtube-dl/youtube_dl/extractor/crunchyroll.py

# coding: utf-8
from __future__ import unicode_literals

import re
import json
import xml.etree.ElementTree as etree
import zlib

from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
from .vrv import VRVIE
from ..compat import (
    compat_b64decode,
    compat_etree_fromstring,
    compat_urllib_parse_urlencode,
    compat_urllib_request,
    compat_urlparse,
)
from ..utils import (
    ExtractorError,
    bytes_to_intlist,
    extract_attributes,
    float_or_none,
    intlist_to_bytes,
    int_or_none,
    lowercase_escape,
    remove_end,
    sanitized_Request,
    unified_strdate,
    urlencode_postdata,
    xpath_text,
)
from ..aes import (
    aes_cbc_decrypt,
)


class CrunchyrollBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://www.crunchyroll.com/login'
    _LOGIN_FORM = 'login_form'
    _NETRC_MACHINE = 'crunchyroll'

    def _call_rpc_api(self, method, video_id, note=None, data=None):
        data = data or {}
        data['req'] = 'RpcApi' + method
        data = compat_urllib_parse_urlencode(data).encode('utf-8')
        return self._download_xml(
            'https://www.crunchyroll.com/xml/',
            video_id, note, fatal=False, data=data, headers={
                'Content-Type': 'application/x-www-form-urlencoded',
            })

    def _login(self):
        username, password = self._get_login_info()
        if username is None:
            return

        self._download_webpage(
            'https://www.crunchyroll.com/?a=formhandler',
            None, 'Logging in', 'Wrong login info',
            data=urlencode_postdata({
                'formname': 'RpcApiUser_Login',
                'next_url': 'https://www.crunchyroll.com/acct/membership',
                'name': username,
                'password': password,
            }))

        '''
        login_page = self._download_webpage(
            self._LOGIN_URL, None, 'Downloading login page')

        def is_logged(webpage):
            return '<title>Redirecting' in webpage

        # Already logged in
        if is_logged(login_page):
            return

        login_form_str = self._search_regex(
            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
            login_page, 'login form', group='form')

        post_url = extract_attributes(login_form_str).get('action')
        if not post_url:
            post_url = self._LOGIN_URL
        elif not post_url.startswith('http'):
            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)

        login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)

        login_form.update({
            'login_form[name]': username,
            'login_form[password]': password,
        })

        response = self._download_webpage(
            post_url, None, 'Logging in', 'Wrong login info',
            data=urlencode_postdata(login_form),
            headers={'Content-Type': 'application/x-www-form-urlencoded'})

        # Successful login
        if is_logged(response):
            return

        error = self._html_search_regex(
            '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
            response, 'error message', default=None)
        if error:
            raise ExtractorError('Unable to login: %s' % error, expected=True)

        raise ExtractorError('Unable to log in')
        '''

    def _real_initialize(self):
        self._login()

    def _download_webpage(self, url_or_request, *args, **kwargs):
        request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
                   else sanitized_Request(url_or_request))
        # Accept-Language must be set explicitly to accept any language to avoid issues
        # similar to https://github.com/rg3/youtube-dl/issues/6797.
        # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
        # should be imposed or not (from what I can see it just takes the first language
        # ignoring the priority and requires it to correspond the IP). By the way this causes
        # Crunchyroll to not work in georestriction cases in some browsers that don't place
        # the locale lang first in header. However allowing any language seems to workaround the issue.
        request.add_header('Accept-Language', '*')
        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)

    @staticmethod
    def _add_skip_wall(url):
        parsed_url = compat_urlparse.urlparse(url)
        qs = compat_urlparse.parse_qs(parsed_url.query)
        # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
        # > This content may be inappropriate for some people.
        # > Are you sure you want to continue?
        # since it's not disabled by default in crunchyroll account's settings.
        # See https://github.com/rg3/youtube-dl/issues/7202.
        qs['skip_wall'] = ['1']
        return compat_urlparse.urlunparse(
            parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))


class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
    IE_NAME = 'crunchyroll'
    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
    _TESTS = [{
        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
        'info_dict': {
            'id': '645513',
            'ext': 'mp4',
            'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
            'description': 'md5:2d17137920c64f2f49981a7797d275ef',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Yomiuri Telecasting Corporation (YTV)',
            'upload_date': '20131013',
            'url': 're:(?!.*&amp)',
        },
        'params': {
            # rtmp
            'skip_download': True,
        },
    }, {
        'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
        'info_dict': {
            'id': '589804',
            'ext': 'flv',
            'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
            'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Danny Choo Network',
            'upload_date': '20120213',
        },
        'params': {
            # rtmp
            'skip_download': True,
        },
        'skip': 'Video gone',
    }, {
        'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
        'info_dict': {
            'id': '702409',
            'ext': 'mp4',
            'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
            'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'TV TOKYO',
            'upload_date': '20160508',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
        'info_dict': {
            'id': '727589',
            'ext': 'mp4',
            'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
            'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Kadokawa Pictures Inc.',
            'upload_date': '20170118',
            'series': "KONOSUBA -God's blessing on this wonderful world!",
            'season': "KONOSUBA -God's blessing on this wonderful world! 2",
            'season_number': 2,
            'episode': 'Give Me Deliverance From This Judicial Injustice!',
            'episode_number': 1,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
        'only_matching': True,
    }, {
        # geo-restricted (US), 18+ maturity wall, non-premium available
        'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
        'only_matching': True,
    }, {
        # A description with double quotes
        'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
        'info_dict': {
            'id': '535080',
            'ext': 'mp4',
            'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
            'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
            'uploader': 'Marvelous AQL Inc.',
            'upload_date': '20091021',
        },
        'params': {
            # Just test metadata extraction
            'skip_download': True,
        },
    }, {
        # make sure we can extract an uploader name that's not a link
        'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
        'info_dict': {
            'id': '606899',
            'ext': 'mp4',
            'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
            'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
            'uploader': 'Geneon Entertainment',
            'upload_date': '20120717',
        },
        'params': {
            # just test metadata extraction
            'skip_download': True,
        },
    }, {
        # A video with a vastly different season name compared to the series name
        'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
        'info_dict': {
            'id': '590532',
            'ext': 'mp4',
            'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
            'description': 'Mahiro and Nyaruko talk about official certification.',
            'uploader': 'TV TOKYO',
            'upload_date': '20120305',
            'series': 'Nyarko-san: Another Crawling Chaos',
            'season': 'Haiyoru! Nyaruani (ONA)',
        },
        'params': {
            # Just test metadata extraction
            'skip_download': True,
        },
    }, {
        'url': 'http://www.crunchyroll.com/media-723735',
        'only_matching': True,
    }, {
        'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
        'only_matching': True,
    }]

    _FORMAT_IDS = {
        '360': ('60', '106'),
        '480': ('61', '106'),
        '720': ('62', '106'),
        '1080': ('80', '108'),
    }

    def _decrypt_subtitles(self, data, iv, id):
        data = bytes_to_intlist(compat_b64decode(data))
        iv = bytes_to_intlist(compat_b64decode(iv))
        id = int(id)

        def obfuscate_key_aux(count, modulo, start):
            output = list(start)
            for _ in range(count):
                output.append(output[-1] + output[-2])
            # cut off start values
            output = output[2:]
            output = list(map(lambda x: x % modulo + 33, output))
            return output

        def obfuscate_key(key):
            num1 = int(floor(pow(2, 25) * sqrt(6.9)))
            num2 = (num1 ^ key) << 5
            num3 = key ^ num1
            num4 = num3 ^ (num3 >> 3) ^ num2
            prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
            shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
            # Extend 160 Bit hash to 256 Bit
            return shaHash + [0] * 12

        key = obfuscate_key(id)

        decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
        return zlib.decompress(decrypted_data)

    def _convert_subtitles_to_srt(self, sub_root):
        output = ''

        for i, event in enumerate(sub_root.findall('./events/event'), 1):
            start = event.attrib['start'].replace('.', ',')
            end = event.attrib['end'].replace('.', ',')
            text = event.attrib['text'].replace('\\N', '\n')
            output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
        return output

    def _convert_subtitles_to_ass(self, sub_root):
        output = ''

        def ass_bool(strvalue):
            assvalue = '0'
            if strvalue == '1':
                assvalue = '-1'
            return assvalue

        output = '[Script Info]\n'
        output += 'Title: %s\n' % sub_root.attrib['title']
        output += 'ScriptType: v4.00+\n'
        output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
        output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
        output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
        output += """
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
"""
        for style in sub_root.findall('./styles/style'):
            output += 'Style: ' + style.attrib['name']
            output += ',' + style.attrib['font_name']
            output += ',' + style.attrib['font_size']
            output += ',' + style.attrib['primary_colour']
            output += ',' + style.attrib['secondary_colour']
            output += ',' + style.attrib['outline_colour']
            output += ',' + style.attrib['back_colour']
            output += ',' + ass_bool(style.attrib['bold'])
            output += ',' + ass_bool(style.attrib['italic'])
            output += ',' + ass_bool(style.attrib['underline'])
            output += ',' + ass_bool(style.attrib['strikeout'])
            output += ',' + style.attrib['scale_x']
            output += ',' + style.attrib['scale_y']
            output += ',' + style.attrib['spacing']
            output += ',' + style.attrib['angle']
            output += ',' + style.attrib['border_style']
            output += ',' + style.attrib['outline']
            output += ',' + style.attrib['shadow']
            output += ',' + style.attrib['alignment']
            output += ',' + style.attrib['margin_l']
            output += ',' + style.attrib['margin_r']
            output += ',' + style.attrib['margin_v']
            output += ',' + style.attrib['encoding']
            output += '\n'

        output += """
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
        for event in sub_root.findall('./events/event'):
            output += 'Dialogue: 0'
            output += ',' + event.attrib['start']
            output += ',' + event.attrib['end']
            output += ',' + event.attrib['style']
            output += ',' + event.attrib['name']
            output += ',' + event.attrib['margin_l']
            output += ',' + event.attrib['margin_r']
            output += ',' + event.attrib['margin_v']
            output += ',' + event.attrib['effect']
            output += ',' + event.attrib['text']
            output += '\n'

        return output

    def _extract_subtitles(self, subtitle):
        sub_root = compat_etree_fromstring(subtitle)
        return [{
            'ext': 'srt',
            'data': self._convert_subtitles_to_srt(sub_root),
        }, {
            'ext': 'ass',
            'data': self._convert_subtitles_to_ass(sub_root),
        }]

    def _get_subtitles(self, video_id, webpage):
        subtitles = {}
        for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
            sub_doc = self._call_rpc_api(
                'Subtitle_GetXml', video_id,
                'Downloading subtitles for ' + sub_name, data={
                    'subtitle_script_id': sub_id,
                })
            if not isinstance(sub_doc, etree.Element):
                continue
            sid = sub_doc.get('id')
            iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
            data = xpath_text(sub_doc, 'data', 'subtitle data')
            if not sid or not iv or not data:
                continue
            subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
            if not lang_code:
                continue
            subtitles[lang_code] = self._extract_subtitles(subtitle)
        return subtitles

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')

        if mobj.group('prefix') == 'm':
            mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
            webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
        else:
            webpage_url = 'http://www.' + mobj.group('url')

        webpage = self._download_webpage(
            self._add_skip_wall(webpage_url), video_id,
            headers=self.geo_verification_headers())
        note_m = self._html_search_regex(
            r'<div class="showmedia-trailer-notice">(.+?)</div>',
            webpage, 'trailer-notice', default='')
        if note_m:
            raise ExtractorError(note_m)

        mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
        if mobj:
            msg = json.loads(mobj.group('msg'))
            if msg.get('type') == 'error':
                raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)

        if 'To view this, please log in to verify you are 18 or older.' in webpage:
            self.raise_login_required()

        media = self._parse_json(self._search_regex(
            r'vilos\.config\.media\s*=\s*({.+?});',
            webpage, 'vilos media', default='{}'), video_id)
        media_metadata = media.get('metadata') or {}

        language = self._search_regex(
            r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
            webpage, 'language', default=None, group='lang')

        video_title = self._html_search_regex(
            r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
            webpage, 'video_title')
        video_title = re.sub(r' {2,}', ' ', video_title)
        video_description = (self._parse_json(self._html_search_regex(
            r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
        if video_description:
            video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
        video_upload_date = self._html_search_regex(
            [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
            webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
        if video_upload_date:
            video_upload_date = unified_strdate(video_upload_date)
        video_uploader = self._html_search_regex(
            # try looking for both an uploader that's a link and one that's not
            [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
            webpage, 'video_uploader', fatal=False)

        formats = []
        for stream in media.get('streams', []):
            audio_lang = stream.get('audio_lang')
            hardsub_lang = stream.get('hardsub_lang')
            vrv_formats = self._extract_vrv_formats(
                stream.get('url'), video_id, stream.get('format'),
                audio_lang, hardsub_lang)
            for f in vrv_formats:
                if not hardsub_lang:
                    f['preference'] = 1
                language_preference = 0
                if audio_lang == language:
                    language_preference += 1
                if hardsub_lang == language:
                    language_preference += 1
                if language_preference:
                    f['language_preference'] = language_preference
            formats.extend(vrv_formats)
        if not formats:
            available_fmts = []
            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
                attrs = extract_attributes(a)
                href = attrs.get('href')
                if href and '/freetrial' in href:
                    continue
                available_fmts.append(fmt)
            if not available_fmts:
                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
                    available_fmts = re.findall(p, webpage)
                    if available_fmts:
                        break
            if not available_fmts:
                available_fmts = self._FORMAT_IDS.keys()
            video_encode_ids = []

            for fmt in available_fmts:
                stream_quality, stream_format = self._FORMAT_IDS[fmt]
                video_format = fmt + 'p'
                stream_infos = []
                streamdata = self._call_rpc_api(
                    'VideoPlayer_GetStandardConfig', video_id,
                    'Downloading media info for %s' % video_format, data={
                        'media_id': video_id,
                        'video_format': stream_format,
                        'video_quality': stream_quality,
                        'current_page': url,
                    })
                if isinstance(streamdata, etree.Element):
                    stream_info = streamdata.find('./{default}preload/stream_info')
                    if stream_info is not None:
                        stream_infos.append(stream_info)
                stream_info = self._call_rpc_api(
                    'VideoEncode_GetStreamInfo', video_id,
                    'Downloading stream info for %s' % video_format, data={
                        'media_id': video_id,
                        'video_format': stream_format,
                        'video_encode_quality': stream_quality,
                    })
                if isinstance(stream_info, etree.Element):
                    stream_infos.append(stream_info)
                for stream_info in stream_infos:
                    video_encode_id = xpath_text(stream_info, './video_encode_id')
                    if video_encode_id in video_encode_ids:
                        continue
                    video_encode_ids.append(video_encode_id)

                    video_file = xpath_text(stream_info, './file')
                    if not video_file:
                        continue
                    if video_file.startswith('http'):
                        formats.extend(self._extract_m3u8_formats(
                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
                            m3u8_id='hls', fatal=False))
                        continue

                    video_url = xpath_text(stream_info, './host')
                    if not video_url:
                        continue
                    metadata = stream_info.find('./metadata')
                    format_info = {
                        'format': video_format,
                        'height': int_or_none(xpath_text(metadata, './height')),
                        'width': int_or_none(xpath_text(metadata, './width')),
                    }

                    if '.fplive.net/' in video_url:
                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
                        parsed_video_url = compat_urlparse.urlparse(video_url)
                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
                            netloc='v.lvlt.crcdn.net',
                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
                        if self._is_valid_url(direct_video_url, video_id, video_format):
                            format_info.update({
                                'format_id': 'http-' + video_format,
                                'url': direct_video_url,
                            })
                            formats.append(format_info)
                            continue

                    format_info.update({
                        'format_id': 'rtmp-' + video_format,
                        'url': video_url,
                        'play_path': video_file,
                        'ext': 'flv',
                    })
                    formats.append(format_info)
        self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))

        metadata = self._call_rpc_api(
            'VideoPlayer_GetMediaMetadata', video_id,
            note='Downloading media info', data={
                'media_id': video_id,
            })

        subtitles = {}
        for subtitle in media.get('subtitles', []):
            subtitle_url = subtitle.get('url')
            if not subtitle_url:
                continue
            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
                'url': subtitle_url,
                'ext': subtitle.get('format', 'ass'),
            })
        if not subtitles:
            subtitles = self.extract_subtitles(video_id, webpage)

        # webpage provide more accurate data than series_title from XML
        series = self._html_search_regex(
            r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
            webpage, 'series', fatal=False)

        season = episode = episode_number = duration = thumbnail = None

        if isinstance(metadata, etree.Element):
            season = xpath_text(metadata, 'series_title')
            episode = xpath_text(metadata, 'episode_title')
            episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
            duration = float_or_none(media_metadata.get('duration'), 1000)
            thumbnail = xpath_text(metadata, 'episode_image_url')

        if not episode:
            episode = media_metadata.get('title')
        if not episode_number:
            episode_number = int_or_none(media_metadata.get('episode_number'))
        if not thumbnail:
            thumbnail = media_metadata.get('thumbnail', {}).get('url')

        season_number = int_or_none(self._search_regex(
            r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
            webpage, 'season number', default=None))

        return {
            'id': video_id,
            'title': video_title,
            'description': video_description,
            'duration': duration,
            'thumbnail': thumbnail,
            'uploader': video_uploader,
            'upload_date': video_upload_date,
            'series': series,
            'season': season,
            'season_number': season_number,
            'episode': episode,
            'episode_number': episode_number,
            'subtitles': subtitles,
            'formats': formats,
        }


class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
    IE_NAME = 'crunchyroll:playlist'
    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'

    _TESTS = [{
        'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
        'info_dict': {
            'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
            'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
        },
        'playlist_count': 13,
    }, {
        # geo-restricted (US), 18+ maturity wall, non-premium available
        'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
        'info_dict': {
            'id': 'cosplay-complex-ova',
            'title': 'Cosplay Complex OVA'
        },
        'playlist_count': 3,
        'skip': 'Georestricted',
    }, {
        # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
        'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        show_id = self._match_id(url)

        webpage = self._download_webpage(
            self._add_skip_wall(url), show_id,
            headers=self.geo_verification_headers())
        title = self._html_search_regex(
            r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
            webpage, 'title')
        episode_paths = re.findall(
            r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
            webpage)
        entries = [
            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
            for ep_id, ep in episode_paths
        ]
        entries.reverse()

        return {
            '_type': 'playlist',
            'id': show_id,
            'title': title,
            'entries': entries,
        }
-												Unify coding cookie

											
										
										
											2016-10-02 11:39:18 +00:00
+								# coding: utf-8
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								from __future__ import unicode_literals
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 13:26:11 +00:00
+								import re
-												[crunchyroll] Handle error message
											
										
										
											2014-02-25 13:29:16 +00:00
+								import json
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 15:12:54 +00:00
+								import xml.etree.ElementTree as etree
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 13:26:11 +00:00
+								import zlib
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								from hashlib import sha1
 								from math import pow, sqrt, floor
-												[crunchyroll] limit VRVIE inheritance to CrunchyrollIE

											
										
										
											2018-09-01 09:04:10 +00:00
+								from .common import InfoExtractor
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								from .vrv import VRVIE
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 11:24:42 +00:00
+								from ..compat import (
-												Switch codebase to use compat_b64decode

											
										
										
											2018-01-23 15:23:12 +00:00
+								    compat_b64decode,
-												Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).

											
										
										
											2015-10-25 19:04:55 +00:00
+								    compat_etree_fromstring,
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											2016-03-25 19:46:57 +00:00
+								    compat_urllib_parse_urlencode,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								    compat_urllib_request,
-												[crunchyroll] Workaround fplive.net rtmp URLs (Closes #5881)

											
										
										
											2015-08-18 17:02:57 +00:00
+								    compat_urlparse,
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 11:24:42 +00:00
+								)
 								from ..utils import (
 								    ExtractorError,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								    bytes_to_intlist,
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								    extract_attributes,
 								    float_or_none,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								    intlist_to_bytes,
-												[crunchyroll] Extract width and height (closes #6749)

											
										
										
											2015-09-03 11:15:02 +00:00
+								    int_or_none,
-												[crunchyroll] Fix description extraction

											
										
										
											2015-11-07 14:02:39 +00:00
+								    lowercase_escape,
-												[crunchyroll] Workaround fplive.net rtmp URLs (Closes #5881)

											
										
										
											2015-08-18 17:02:57 +00:00
+								    remove_end,
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 16:18:17 +00:00
+								    sanitized_Request,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								    unified_strdate,
-												Add login support to Crunchyroll extractor
											
										
										
											2014-08-29 21:32:03 +00:00
+								    urlencode_postdata,
-												[crunchyroll] Extract width and height (closes #6749)

											
										
										
											2015-09-03 11:15:02 +00:00
+								    xpath_text,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								)
 								from ..aes import (
 								    aes_cbc_decrypt,
 								)
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 13:26:11 +00:00
-												[crunchyroll] limit VRVIE inheritance to CrunchyrollIE

											
										
										
											2018-09-01 09:04:10 +00:00
+								class CrunchyrollBaseIE(InfoExtractor):
-												[crunchyroll] Fix authentication (Closes #10655)

											
										
										
											2016-09-15 14:53:35 +00:00
+								    _LOGIN_URL = 'https://www.crunchyroll.com/login'
 								    _LOGIN_FORM = 'login_form'
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 00:57:57 +00:00
+								    _NETRC_MACHINE = 'crunchyroll'
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 18:15:16 +00:00
+								    def _call_rpc_api(self, method, video_id, note=None, data=None):
 								        data = data or {}
 								        data['req'] = 'RpcApi' + method
 								        data = compat_urllib_parse_urlencode(data).encode('utf-8')
 								        return self._download_xml(
-												[crunchyroll] switch to HTTPS for RpcApi(closes #17749)

											
										
										
											2018-10-02 18:43:06 +00:00
+								            'https://www.crunchyroll.com/xml/',
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 18:15:16 +00:00
+								            video_id, note, fatal=False, data=data, headers={
 								                'Content-Type': 'application/x-www-form-urlencoded',
 								            })
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 00:57:57 +00:00
+								    def _login(self):
-												remove unnecessary assignment parenthesis

											
										
										
											2018-05-26 15:12:44 +00:00
+								        username, password = self._get_login_info()
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 00:57:57 +00:00
+								        if username is None:
 								            return
-												[crunchyroll] Fix authentication (Closes #10655)

											
										
										
											2016-09-15 14:53:35 +00:00
-												use older login method(closes #11572)

											
										
										
											2017-11-08 19:30:05 +00:00
+								        self._download_webpage(
 								            'https://www.crunchyroll.com/?a=formhandler',
 								            None, 'Logging in', 'Wrong login info',
 								            data=urlencode_postdata({
 								                'formname': 'RpcApiUser_Login',
 								                'next_url': 'https://www.crunchyroll.com/acct/membership',
 								                'name': username,
 								                'password': password,
 								            }))
 								        '''
-												[crunchyroll] Fix authentication (Closes #10655)

											
										
										
											2016-09-15 14:53:35 +00:00
+								        login_page = self._download_webpage(
 								            self._LOGIN_URL, None, 'Downloading login page')
-												[crunchyroll] Check if already logged in (Closes #10700)

											
										
										
											2016-09-18 19:50:06 +00:00
+								        def is_logged(webpage):
 								            return '<title>Redirecting' in webpage
 								        # Already logged in
 								        if is_logged(login_page):
 								            return
-												[crunchyroll] Fix authentication (Closes #10655)

											
										
										
											2016-09-15 14:53:35 +00:00
+								        login_form_str = self._search_regex(
 								            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
 								            login_page, 'login form', group='form')
 								        post_url = extract_attributes(login_form_str).get('action')
 								        if not post_url:
 								            post_url = self._LOGIN_URL
 								        elif not post_url.startswith('http'):
 								            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
 								        login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
 								        login_form.update({
 								            'login_form[name]': username,
 								            'login_form[password]': password,
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 00:57:57 +00:00
+								        })
-												[crunchyroll] Fix authentication (Closes #10655)

											
										
										
											2016-09-15 14:53:35 +00:00
 								        response = self._download_webpage(
 								            post_url, None, 'Logging in', 'Wrong login info',
 								            data=urlencode_postdata(login_form),
 								            headers={'Content-Type': 'application/x-www-form-urlencoded'})
 								        # Successful login
-												[crunchyroll] Check if already logged in (Closes #10700)

											
										
										
											2016-09-18 19:50:06 +00:00
+								        if is_logged(response):
-												[crunchyroll] Fix authentication (Closes #10655)

											
										
										
											2016-09-15 14:53:35 +00:00
+								            return
 								        error = self._html_search_regex(
 								            '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
 								            response, 'error message', default=None)
 								        if error:
 								            raise ExtractorError('Unable to login: %s' % error, expected=True)
 								        raise ExtractorError('Unable to log in')
-												use older login method(closes #11572)

											
										
										
											2017-11-08 19:30:05 +00:00
+								        '''
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 00:57:57 +00:00
 								    def _real_initialize(self):
 								        self._login()
-												[crunchyroll] Fix custom _download_webpage (Closes #8883)

											
										
										
											2016-03-17 16:55:04 +00:00
+								    def _download_webpage(self, url_or_request, *args, **kwargs):
-												[crunchyroll] Force Accept-Language to any for all requests (Closes #6797)

											
										
										
											2015-09-08 08:11:20 +00:00
+								        request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 16:18:17 +00:00
+								                   else sanitized_Request(url_or_request))
-												[crunchyroll] Force Accept-Language to any for all requests (Closes #6797)

											
										
										
											2015-09-08 08:11:20 +00:00
+								        # Accept-Language must be set explicitly to accept any language to avoid issues
 								        # similar to https://github.com/rg3/youtube-dl/issues/6797.
 								        # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
 								        # should be imposed or not (from what I can see it just takes the first language
 								        # ignoring the priority and requires it to correspond the IP). By the way this causes
 								        # Crunchyroll to not work in georestriction cases in some browsers that don't place
 								        # the locale lang first in header. However allowing any language seems to workaround the issue.
 								        request.add_header('Accept-Language', '*')
-												[crunchyroll] Fix custom _download_webpage (Closes #8883)

											
										
										
											2016-03-17 16:55:04 +00:00
+								        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-												[crunchyroll] Force Accept-Language to any for all requests (Closes #6797)

											
										
										
											2015-09-08 08:11:20 +00:00
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 00:57:57 +00:00
+								    @staticmethod
 								    def _add_skip_wall(url):
 								        parsed_url = compat_urlparse.urlparse(url)
 								        qs = compat_urlparse.parse_qs(parsed_url.query)
 								        # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
 								        # > This content may be inappropriate for some people.
 								        # > Are you sure you want to continue?
 								        # since it's not disabled by default in crunchyroll account's settings.
 								        # See https://github.com/rg3/youtube-dl/issues/7202.
 								        qs['skip_wall'] = ['1']
 								        return compat_urlparse.urlunparse(
-												[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict

encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode
All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode

Closes #8974

											
										
										
											2016-03-25 19:46:57 +00:00
+								            parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-												[crunchyroll] Bypass maturity wall (Closes #7202)

											
										
										
											2015-10-18 00:57:57 +00:00
-												[crunchyroll] Force Accept-Language to any for all requests (Closes #6797)

											
										
										
											2015-09-08 08:11:20 +00:00
-												[crunchyroll] limit VRVIE inheritance to CrunchyrollIE

											
										
										
											2018-09-01 09:04:10 +00:00
+								class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
 								    IE_NAME = 'crunchyroll'
-												[crunchyroll] Extend _VALID_URL (closes #18955)

											
										
										
											2019-01-22 20:55:41 +00:00
+								    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
-												[crunchyroll] Add .fr domain (#4537)

											
										
										
											2014-12-21 18:58:15 +00:00
+								    _TESTS = [{
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
 								        'info_dict': {
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 13:26:11 +00:00
+								            'id': '645513',
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 16:08:45 +00:00
+								            'ext': 'mp4',
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								            'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
 								            'description': 'md5:2d17137920c64f2f49981a7797d275ef',
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								            'thumbnail': r're:^https?://.*\.jpg$',
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								            'uploader': 'Yomiuri Telecasting Corporation (YTV)',
 								            'upload_date': '20131013',
-												[crunchyroll] Correct parsing (Fixes #4014)

											
										
										
											2014-10-23 21:25:02 +00:00
+								            'url': 're:(?!.*&amp)',
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        },
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								        'params': {
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								            # rtmp
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								            'skip_download': True,
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        },
-												[crunchyroll] Fix extraction (Closes #5855, closes #5881)

											
										
										
											2015-07-05 00:29:36 +00:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
 								        'info_dict': {
 								            'id': '589804',
 								            'ext': 'flv',
 								            'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
-												[crunchyroll] Fix description extraction

											
										
										
											2015-11-07 14:02:39 +00:00
+								            'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											2017-01-02 12:08:07 +00:00
+								            'thumbnail': r're:^https?://.*\.jpg$',
-												[crunchyroll] Fix extraction (Closes #5855, closes #5881)

											
										
										
											2015-07-05 00:29:36 +00:00
+								            'uploader': 'Danny Choo Network',
 								            'upload_date': '20120213',
 								        },
 								        'params': {
 								            # rtmp
 								            'skip_download': True,
 								        },
-												[crunchyroll] Skip an invalid _TEST

											
										
										
											2016-10-15 06:36:07 +00:00
+								        'skip': 'Video gone',
-												[crunchyroll] Add support for HLS (Closes #10301)

											
										
										
											2016-08-11 17:56:16 +00:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
 								        'info_dict': {
 								            'id': '702409',
 								            'ext': 'mp4',
 								            'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
 								            'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											2017-01-02 12:08:07 +00:00
+								            'thumbnail': r're:^https?://.*\.jpg$',
-												[crunchyroll] Add support for HLS (Closes #10301)

											
										
										
											2016-08-11 17:56:16 +00:00
+								            'uploader': 'TV TOKYO',
 								            'upload_date': '20160508',
 								        },
 								        'params': {
 								            # m3u8 download
 								            'skip_download': True,
 								        },
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
 								        'info_dict': {
 								            'id': '727589',
 								            'ext': 'mp4',
-												[crunchyroll] Update test


											
										
										
											2017-05-01 17:56:51 +00:00
+								            'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								            'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
 								            'thumbnail': r're:^https?://.*\.jpg$',
 								            'uploader': 'Kadokawa Pictures Inc.',
 								            'upload_date': '20170118',
 								            'series': "KONOSUBA -God's blessing on this wonderful world!",
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 04:18:10 +00:00
+								            'season': "KONOSUBA -God's blessing on this wonderful world! 2",
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								            'season_number': 2,
-												[crunchyroll] Update test


											
										
										
											2017-05-01 17:56:51 +00:00
+								            'episode': 'Give Me Deliverance From This Judicial Injustice!',
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								            'episode_number': 1,
 								        },
 								        'params': {
 								            # m3u8 download
 								            'skip_download': True,
 								        },
-												[crunchyroll] Add .fr domain (#4537)

											
										
										
											2014-12-21 18:58:15 +00:00
+								    }, {
 								        'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
 								        'only_matching': True,
-												[crunchyroll] Add maturity wall reference tests (#7202)

											
										
										
											2015-10-18 01:06:47 +00:00
+								    }, {
 								        # geo-restricted (US), 18+ maturity wall, non-premium available
 								        'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
 								        'only_matching': True,
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 16:08:45 +00:00
+								    }, {
 								        # A description with double quotes
 								        'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
 								        'info_dict': {
 								            'id': '535080',
 								            'ext': 'mp4',
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								            'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 16:08:45 +00:00
+								            'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
 								            'uploader': 'Marvelous AQL Inc.',
 								            'upload_date': '20091021',
 								        },
 								        'params': {
 								            # Just test metadata extraction
 								            'skip_download': True,
 								        },
-												[crunchyroll] Extract uploader name that's not a link

Provide the Crunchyroll extractor with the ability to extract uploader
names that aren't links. Add a test for this new functionality.
This fixes #12267.
											
										
										
											2017-02-26 11:08:10 +00:00
+								    }, {
 								        # make sure we can extract an uploader name that's not a link
 								        'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
 								        'info_dict': {
 								            'id': '606899',
 								            'ext': 'mp4',
 								            'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
 								            'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
 								            'uploader': 'Geneon Entertainment',
 								            'upload_date': '20120717',
 								        },
 								        'params': {
 								            # just test metadata extraction
 								            'skip_download': True,
 								        },
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 04:18:10 +00:00
+								    }, {
 								        # A video with a vastly different season name compared to the series name
 								        'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
 								        'info_dict': {
 								            'id': '590532',
 								            'ext': 'mp4',
 								            'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
 								            'description': 'Mahiro and Nyaruko talk about official certification.',
 								            'uploader': 'TV TOKYO',
 								            'upload_date': '20120305',
 								            'series': 'Nyarko-san: Another Crawling Chaos',
 								            'season': 'Haiyoru! Nyaruani (ONA)',
 								        },
 								        'params': {
 								            # Just test metadata extraction
 								            'skip_download': True,
 								        },
-												[crunchyroll:playlist] Restrict _VALID_URL (closes #17069)


											
										
										
											2018-07-28 23:56:52 +00:00
+								    }, {
 								        'url': 'http://www.crunchyroll.com/media-723735',
 								        'only_matching': True,
-												[crunchyroll] Extend _VALID_URL (closes #18955)

											
										
										
											2019-01-22 20:55:41 +00:00
+								    }, {
 								        'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
 								        'only_matching': True,
-												[crunchyroll] Add .fr domain (#4537)

											
										
										
											2014-12-21 18:58:15 +00:00
+								    }]
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
 								    _FORMAT_IDS = {
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								        '360': ('60', '106'),
 								        '480': ('61', '106'),
 								        '720': ('62', '106'),
 								        '1080': ('80', '108'),
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								    }
 								    def _decrypt_subtitles(self, data, iv, id):
-												Switch codebase to use compat_b64decode

											
										
										
											2018-01-23 15:23:12 +00:00
+								        data = bytes_to_intlist(compat_b64decode(data))
 								        iv = bytes_to_intlist(compat_b64decode(iv))
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        id = int(id)
 								        def obfuscate_key_aux(count, modulo, start):
 								            output = list(start)
 								            for _ in range(count):
 								                output.append(output[-1] + output[-2])
 								            # cut off start values
 								            output = output[2:]
 								            output = list(map(lambda x: x % modulo + 33, output))
 								            return output
 								        def obfuscate_key(key):
 								            num1 = int(floor(pow(2, 25) * sqrt(6.9)))
 								            num2 = (num1 ^ key) << 5
 								            num3 = key ^ num1
 								            num4 = num3 ^ (num3 >> 3) ^ num2
 								            prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								            shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								            # Extend 160 Bit hash to 256 Bit
 								            return shaHash + [0] * 12
-												[crunchyroll] Tidy and modernize
											
										
										
											2014-02-25 13:26:11 +00:00
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        key = obfuscate_key(id)
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
 								        return zlib.decompress(decrypted_data)
-												[crunchycroll] Fix building of ass subtitles (reported in #4019)

Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.

											
										
										
											2014-10-29 20:19:20 +00:00
+								    def _convert_subtitles_to_srt(self, sub_root):
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								        output = ''
-												[crunchycroll] Fix building of ass subtitles (reported in #4019)

Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.

											
										
										
											2014-10-29 20:19:20 +00:00
 								        for i, event in enumerate(sub_root.findall('./events/event'), 1):
 								            start = event.attrib['start'].replace('.', ',')
 								            end = event.attrib['end'].replace('.', ',')
 								            text = event.attrib['text'].replace('\\N', '\n')
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								            output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        return output
-												[crunchycroll] Fix building of ass subtitles (reported in #4019)

Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.

											
										
										
											2014-10-29 20:19:20 +00:00
+								    def _convert_subtitles_to_ass(self, sub_root):
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 11:48:56 +00:00
+								        output = ''
 								        def ass_bool(strvalue):
 								            assvalue = '0'
 								            if strvalue == '1':
 								                assvalue = '-1'
 								            return assvalue
 								        output = '[Script Info]\n'
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 09:37:17 +00:00
+								        output += 'Title: %s\n' % sub_root.attrib['title']
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 11:48:56 +00:00
+								        output += 'ScriptType: v4.00+\n'
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 09:37:17 +00:00
+								        output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
 								        output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
 								        output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
-												[crunchyroll] Remove ScaledBorderAndShadow settings

See https://github.com/rg3/youtube-dl/pull/9028, especially @lachs0r's
comments for the reason behind this change

											
										
										
											2017-02-01 08:39:32 +00:00
+								        output += """
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 11:48:56 +00:00
+								[V4+ Styles]
 								Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 								"""
 								        for style in sub_root.findall('./styles/style'):
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 09:37:17 +00:00
+								            output += 'Style: ' + style.attrib['name']
 								            output += ',' + style.attrib['font_name']
 								            output += ',' + style.attrib['font_size']
 								            output += ',' + style.attrib['primary_colour']
 								            output += ',' + style.attrib['secondary_colour']
 								            output += ',' + style.attrib['outline_colour']
 								            output += ',' + style.attrib['back_colour']
 								            output += ',' + ass_bool(style.attrib['bold'])
 								            output += ',' + ass_bool(style.attrib['italic'])
 								            output += ',' + ass_bool(style.attrib['underline'])
 								            output += ',' + ass_bool(style.attrib['strikeout'])
 								            output += ',' + style.attrib['scale_x']
 								            output += ',' + style.attrib['scale_y']
 								            output += ',' + style.attrib['spacing']
 								            output += ',' + style.attrib['angle']
 								            output += ',' + style.attrib['border_style']
 								            output += ',' + style.attrib['outline']
 								            output += ',' + style.attrib['shadow']
 								            output += ',' + style.attrib['alignment']
 								            output += ',' + style.attrib['margin_l']
 								            output += ',' + style.attrib['margin_r']
 								            output += ',' + style.attrib['margin_v']
 								            output += ',' + style.attrib['encoding']
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 11:48:56 +00:00
+								            output += '\n'
 								        output += """
 								[Events]
 								Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 								"""
 								        for event in sub_root.findall('./events/event'):
 								            output += 'Dialogue: 0'
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 09:37:17 +00:00
+								            output += ',' + event.attrib['start']
 								            output += ',' + event.attrib['end']
 								            output += ',' + event.attrib['style']
 								            output += ',' + event.attrib['name']
 								            output += ',' + event.attrib['margin_l']
 								            output += ',' + event.attrib['margin_r']
 								            output += ',' + event.attrib['margin_v']
 								            output += ',' + event.attrib['effect']
 								            output += ',' + event.attrib['text']
-												Add SSA (.ass) subtitle output for Crunchyroll
											
										
										
											2014-08-30 11:48:56 +00:00
+								            output += '\n'
 								        return output
-												[crunchyroll] Extract subtitles extraction routine

											
										
										
											2015-05-30 08:12:58 +00:00
+								    def _extract_subtitles(self, subtitle):
-												Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).

											
										
										
											2015-10-25 19:04:55 +00:00
+								        sub_root = compat_etree_fromstring(subtitle)
-												[crunchyroll] Extract subtitles extraction routine

											
										
										
											2015-05-30 08:12:58 +00:00
+								        return [{
 								            'ext': 'srt',
 								            'data': self._convert_subtitles_to_srt(sub_root),
 								        }, {
 								            'ext': 'ass',
 								            'data': self._convert_subtitles_to_ass(sub_root),
 								        }]
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 17:21:42 +00:00
+								    def _get_subtitles(self, video_id, webpage):
 								        subtitles = {}
-												[crunchyroll] Improve subtitle regex (Closes #7262)

											
										
										
											2015-10-22 14:34:11 +00:00
+								        for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 18:15:16 +00:00
+								            sub_doc = self._call_rpc_api(
 								                'Subtitle_GetXml', video_id,
 								                'Downloading subtitles for ' + sub_name, data={
 								                    'subtitle_script_id': sub_id,
 								                })
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 15:12:54 +00:00
+								            if not isinstance(sub_doc, etree.Element):
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 17:21:42 +00:00
+								                continue
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 18:15:16 +00:00
+								            sid = sub_doc.get('id')
 								            iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
 								            data = xpath_text(sub_doc, 'data', 'subtitle data')
 								            if not sid or not iv or not data:
 								                continue
 								            subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 17:21:42 +00:00
+								            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
 								            if not lang_code:
 								                continue
-												[crunchyroll] Extract subtitles extraction routine

											
										
										
											2015-05-30 08:12:58 +00:00
+								            subtitles[lang_code] = self._extract_subtitles(subtitle)
-												[crunchyroll] Convert to new subtitles system

											
										
										
											2015-02-15 17:21:42 +00:00
+								        return subtitles
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
+								    def _real_extract(self, url):
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        mobj = re.match(self._VALID_URL, url)
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								        video_id = mobj.group('video_id')
 								        if mobj.group('prefix') == 'm':
 								            mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
 								            webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
 								        else:
 								            webpage_url = 'http://www.' + mobj.group('url')
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
-												[crunchyroll] pass geo verifcation proxy

											
										
										
											2017-04-01 08:33:23 +00:00
+								        webpage = self._download_webpage(
 								            self._add_skip_wall(webpage_url), video_id,
 								            headers=self.geo_verification_headers())
-												[crunchyroll] Detect required login (#6677)

											
										
										
											2015-08-26 14:47:57 +00:00
+								        note_m = self._html_search_regex(
 								            r'<div class="showmedia-trailer-notice">(.+?)</div>',
 								            webpage, 'trailer-notice', default='')
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        if note_m:
 								            raise ExtractorError(note_m)
-												[crunchyroll] Handle error message
											
										
										
											2014-02-25 13:29:16 +00:00
+								        mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
 								        if mobj:
 								            msg = json.loads(mobj.group('msg'))
 								            if msg.get('type') == 'error':
 								                raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
-												[crunchyroll] Detect required login (#6677)

											
										
										
											2015-08-26 14:47:57 +00:00
 								        if 'To view this, please log in to verify you are 18 or older.' in webpage:
-												[crunchyroll] Fix typo

											
										
										
											2015-08-26 15:27:57 +00:00
+								            self.raise_login_required()
-												[crunchyroll] Handle error message
											
										
										
											2014-02-25 13:29:16 +00:00
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								        media = self._parse_json(self._search_regex(
 								            r'vilos\.config\.media\s*=\s*({.+?});',
 								            webpage, 'vilos media', default='{}'), video_id)
 								        media_metadata = media.get('metadata') or {}
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 16:54:25 +00:00
+								        language = self._search_regex(
 								            r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
 								            webpage, 'language', default=None, group='lang')
-												[crunchyroll] Fix title extraction (Closes #7396)

											
										
										
											2015-11-07 13:25:59 +00:00
+								        video_title = self._html_search_regex(
 								            r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
 								            webpage, 'video_title')
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								        video_title = re.sub(r' {2,}', ' ', video_title)
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								        video_description = (self._parse_json(self._html_search_regex(
-												[crunchyroll] Fix descriptions with double quotes (closes #12124)

											
										
										
											2017-02-22 16:08:45 +00:00
+								            r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
-												[crunchyroll] Fix description extraction

											
										
										
											2015-11-07 14:02:39 +00:00
+								        if video_description:
 								            video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
-												[crunchyroll] Fix uploader and upload date extraction

											
										
										
											2015-09-08 08:37:53 +00:00
+								        video_upload_date = self._html_search_regex(
 								            [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
 								            webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        if video_upload_date:
 								            video_upload_date = unified_strdate(video_upload_date)
-												[crunchyroll] Fix uploader and upload date extraction

											
										
										
											2015-09-08 08:37:53 +00:00
+								        video_uploader = self._html_search_regex(
-												[crunchyroll] Extract uploader name that's not a link

Provide the Crunchyroll extractor with the ability to extract uploader
names that aren't links. Add a test for this new functionality.
This fixes #12267.
											
										
										
											2017-02-26 11:08:10 +00:00
+								            # try looking for both an uploader that's a link and one that's not
 								            [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
 								            webpage, 'video_uploader', fatal=False)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
-												[crunchyroll] reduce requests for formats extraction

											
										
										
											2016-04-29 10:46:42 +00:00
+								        formats = []
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								        for stream in media.get('streams', []):
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 16:54:25 +00:00
+								            audio_lang = stream.get('audio_lang')
 								            hardsub_lang = stream.get('hardsub_lang')
 								            vrv_formats = self._extract_vrv_formats(
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								                stream.get('url'), video_id, stream.get('format'),
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 16:54:25 +00:00
+								                audio_lang, hardsub_lang)
 								            for f in vrv_formats:
 								                if not hardsub_lang:
 								                    f['preference'] = 1
 								                language_preference = 0
 								                if audio_lang == language:
 								                    language_preference += 1
 								                if hardsub_lang == language:
 								                    language_preference += 1
 								                if language_preference:
 								                    f['language_preference'] = language_preference
 								            formats.extend(vrv_formats)
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								        if not formats:
 								            available_fmts = []
 								            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
 								                attrs = extract_attributes(a)
 								                href = attrs.get('href')
 								                if href and '/freetrial' in href:
 								                    continue
 								                available_fmts.append(fmt)
 								            if not available_fmts:
 								                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
 								                    available_fmts = re.findall(p, webpage)
 								                    if available_fmts:
 								                        break
 								            if not available_fmts:
 								                available_fmts = self._FORMAT_IDS.keys()
 								            video_encode_ids = []
 								            for fmt in available_fmts:
 								                stream_quality, stream_format = self._FORMAT_IDS[fmt]
 								                video_format = fmt + 'p'
 								                stream_infos = []
 								                streamdata = self._call_rpc_api(
 								                    'VideoPlayer_GetStandardConfig', video_id,
 								                    'Downloading media info for %s' % video_format, data={
 								                        'media_id': video_id,
 								                        'video_format': stream_format,
 								                        'video_quality': stream_quality,
 								                        'current_page': url,
 								                    })
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 15:12:54 +00:00
+								                if isinstance(streamdata, etree.Element):
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								                    stream_info = streamdata.find('./{default}preload/stream_info')
 								                    if stream_info is not None:
 								                        stream_infos.append(stream_info)
 								                stream_info = self._call_rpc_api(
 								                    'VideoEncode_GetStreamInfo', video_id,
 								                    'Downloading stream info for %s' % video_format, data={
 								                        'media_id': video_id,
 								                        'video_format': stream_format,
 								                        'video_encode_quality': stream_quality,
 								                    })
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 15:12:54 +00:00
+								                if isinstance(stream_info, etree.Element):
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 18:15:16 +00:00
+								                    stream_infos.append(stream_info)
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								                for stream_info in stream_infos:
 								                    video_encode_id = xpath_text(stream_info, './video_encode_id')
 								                    if video_encode_id in video_encode_ids:
 								                        continue
 								                    video_encode_ids.append(video_encode_id)
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								                    video_file = xpath_text(stream_info, './file')
 								                    if not video_file:
 								                        continue
 								                    if video_file.startswith('http'):
 								                        formats.extend(self._extract_m3u8_formats(
 								                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
 								                            m3u8_id='hls', fatal=False))
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 18:15:16 +00:00
+								                        continue
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								                    video_url = xpath_text(stream_info, './host')
 								                    if not video_url:
 								                        continue
 								                    metadata = stream_info.find('./metadata')
 								                    format_info = {
 								                        'format': video_format,
 								                        'height': int_or_none(xpath_text(metadata, './height')),
 								                        'width': int_or_none(xpath_text(metadata, './width')),
 								                    }
 								                    if '.fplive.net/' in video_url:
 								                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
 								                        parsed_video_url = compat_urlparse.urlparse(video_url)
 								                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
 								                            netloc='v.lvlt.crcdn.net',
 								                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
 								                        if self._is_valid_url(direct_video_url, video_id, video_format):
 								                            format_info.update({
 								                                'format_id': 'http-' + video_format,
 								                                'url': direct_video_url,
 								                            })
 								                            formats.append(format_info)
 								                            continue
 								                    format_info.update({
 								                        'format_id': 'rtmp-' + video_format,
 								                        'url': video_url,
 								                        'play_path': video_file,
 								                        'ext': 'flv',
 								                    })
 								                    formats.append(format_info)
-												[crunchyroll] Prefer hardsubless formats and formats in locale language

											
										
										
											2018-09-16 16:54:25 +00:00
+								        self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
-												[crunchyroll] extract old rtmp formats

											
										
										
											2017-11-13 18:15:16 +00:00
 								        metadata = self._call_rpc_api(
 								            'VideoPlayer_GetMediaMetadata', video_id,
 								            note='Downloading media info', data={
-												[crunchyroll] improve extraction

- extract more metadata(series, episode, episode_number)
- reduce duplicate requests for extracting formats
- remove duplicate formats

											
										
										
											2016-04-28 17:42:20 +00:00
+								                'media_id': video_id,
 								            })
-												[crunchyroll] parse vilos media data(closes #17343)

											
										
										
											2018-09-01 07:16:28 +00:00
+								        subtitles = {}
 								        for subtitle in media.get('subtitles', []):
 								            subtitle_url = subtitle.get('url')
 								            if not subtitle_url:
 								                continue
 								            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
 								                'url': subtitle_url,
 								                'ext': subtitle.get('format', 'ass'),
 								            })
 								        if not subtitles:
 								            subtitles = self.extract_subtitles(video_id, webpage)
-												[crunchyroll] Allow to list subtitles (fixes #3805)

											
										
										
											2014-09-25 14:57:38 +00:00
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								        # webpage provide more accurate data than series_title from XML
 								        series = self._html_search_regex(
-												[crunchyroll] Relax series and season regex (closes #13659)

											
										
										
											2017-07-16 05:40:45 +00:00
+								            r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 04:18:10 +00:00
+								            webpage, 'series', fatal=False)
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 15:12:54 +00:00
+								        season = episode = episode_number = duration = thumbnail = None
 								        if isinstance(metadata, etree.Element):
 								            season = xpath_text(metadata, 'series_title')
 								            episode = xpath_text(metadata, 'episode_title')
 								            episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
 								            duration = float_or_none(media_metadata.get('duration'), 1000)
 								            thumbnail = xpath_text(metadata, 'episode_image_url')
 								        if not episode:
 								            episode = media_metadata.get('title')
 								        if not episode_number:
 								            episode_number = int_or_none(media_metadata.get('episode_number'))
 								        if not thumbnail:
 								            thumbnail = media_metadata.get('thumbnail', {}).get('url')
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
 								        season_number = int_or_none(self._search_regex(
-												[crunchyroll] Relax series and season regex (closes #13659)

											
										
										
											2017-07-16 05:40:45 +00:00
+								            r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								            webpage, 'season number', default=None))
-												Add support for crunchyroll.com

											
										
										
											2013-11-04 02:08:17 +00:00
+								        return {
-												PEP8: more applied

											
										
										
											2014-11-23 20:20:46 +00:00
+								            'id': video_id,
 								            'title': video_title,
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								            'description': video_description,
-												[crunchyroll] Improve extraction failsafeness (closes #17991)

											
										
										
											2018-10-28 15:12:54 +00:00
+								            'duration': duration,
 								            'thumbnail': thumbnail,
-												PEP8: more applied

											
										
										
											2014-11-23 20:20:46 +00:00
+								            'uploader': video_uploader,
-												[crunchyroll] Add support for mobile URLs and use unicode literals
											
										
										
											2014-01-29 22:23:44 +00:00
+								            'upload_date': video_upload_date,
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								            'series': series,
-												[crunchyroll] Extract season name


											
										
										
											2017-03-12 04:18:10 +00:00
+								            'season': season,
-												[crunchyroll] Improve series and season metadata extraction (closes #11832)

											
										
										
											2017-01-27 16:55:55 +00:00
+								            'season_number': season_number,
 								            'episode': episode,
 								            'episode_number': episode_number,
-												PEP8: more applied

											
										
										
											2014-11-23 20:20:46 +00:00
+								            'subtitles': subtitles,
 								            'formats': formats,
-												[crunchyroll] Use `enumerate`

											
										
										
											2014-02-25 19:51:51 +00:00
+								        }
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 05:47:05 +00:00
-												[crunchyroll] Force Accept-Language to any for all requests (Closes #6797)

											
										
										
											2015-09-08 08:11:20 +00:00
+								class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 09:37:17 +00:00
+								    IE_NAME = 'crunchyroll:playlist'
-												[crunchyroll:playlist] Restrict _VALID_URL (closes #17069)


											
										
										
											2018-07-28 23:56:52 +00:00
+								    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 05:47:05 +00:00
 								    _TESTS = [{
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 16:28:09 +00:00
+								        'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
 								        'info_dict': {
 								            'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
 								            'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 05:47:05 +00:00
+								        },
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 16:28:09 +00:00
+								        'playlist_count': 13,
-												[crunchyroll] Add maturity wall reference tests (#7202)

											
										
										
											2015-10-18 01:06:47 +00:00
+								    }, {
 								        # geo-restricted (US), 18+ maturity wall, non-premium available
 								        'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
 								        'info_dict': {
 								            'id': 'cosplay-complex-ova',
 								            'title': 'Cosplay Complex OVA'
 								        },
 								        'playlist_count': 3,
 								        'skip': 'Georestricted',
 								    }, {
 								        # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
 								        'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
 								        'only_matching': True,
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 05:47:05 +00:00
+								    }]
 								    def _real_extract(self, url):
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 16:28:09 +00:00
+								        show_id = self._match_id(url)
-												[crunchyroll] pass geo verifcation proxy

											
										
										
											2017-04-01 08:33:23 +00:00
+								        webpage = self._download_webpage(
 								            self._add_skip_wall(url), show_id,
 								            headers=self.geo_verification_headers())
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 16:28:09 +00:00
+								        title = self._html_search_regex(
 								            r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
 								            webpage, 'title')
 								        episode_paths = re.findall(
-												[crunchyroll] extract playlist entries ids

											
										
										
											2017-02-23 10:54:43 +00:00
+								            r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 16:28:09 +00:00
+								            webpage)
 								        entries = [
-												[crunchyroll] extract playlist entries ids

											
										
										
											2017-02-23 10:54:43 +00:00
+								            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
 								            for ep_id, ep in episode_paths
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 16:28:09 +00:00
+								        ]
 								        entries.reverse()
-												Added extractor for crunchyroll 'playlists' i.e. series. so that one can, e.g. download all episodes of a series

											
										
										
											2014-10-20 05:47:05 +00:00
+								        return {
-												[crunchyroll:playlist] Simplify (#3988)

											
										
										
											2014-10-26 16:28:09 +00:00
+								            '_type': 'playlist',
 								            'id': show_id,
 								            'title': title,
 								            'entries': entries,
 								        }