youtube-dl/youtube_dl/extractor/discovery.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    parse_duration,
    parse_iso8601,
)
from ..compat import compat_str


class DiscoveryIE(InfoExtractor):
    _VALID_URL = r'''(?x)http://(?:www\.)?(?:
            discovery|
            investigationdiscovery|
            discoverylife|
            animalplanet|
            ahctv|
            destinationamerica|
            sciencechannel|
            tlc|
            velocity
        )\.com/(?:[^/]+/)*(?P<id>[^./?#]+)'''
    _TESTS = [{
        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
        'info_dict': {
            'id': '20769',
            'ext': 'mp4',
            'title': 'Mission Impossible Outtakes',
            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                            ' each other -- to the point of confusing Jamie\'s dog -- and '
                            'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
                            ' back.'),
            'duration': 156,
            'timestamp': 1302032462,
            'upload_date': '20110405',
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
        }
    }, {
        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
        'info_dict': {
            'id': 'mythbusters-the-simpsons',
            'title': 'MythBusters: The Simpsons',
        },
        'playlist_mincount': 10,
    }, {
        'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',
        'info_dict': {
            'id': '78326',
            'ext': 'mp4',
            'title': 'Longfin Eels: Maneaters?',
            'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.',
            'upload_date': '20140725',
            'timestamp': 1406246400,
            'duration': 116,
        },
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        info = self._download_json(url + '?flat=1', display_id)

        video_title = info.get('playlist_title') or info.get('video_title')

        entries = [{
            'id': compat_str(video_info['id']),
            'formats': self._extract_m3u8_formats(
                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
                note='Download m3u8 information for video %d' % (idx + 1)),
            'title': video_info['title'],
            'description': video_info.get('description'),
            'duration': parse_duration(video_info.get('video_length')),
            'webpage_url': video_info.get('href') or video_info.get('url'),
            'thumbnail': video_info.get('thumbnailURL'),
            'alt_title': video_info.get('secondary_title'),
            'timestamp': parse_iso8601(video_info.get('publishedDate')),
        } for idx, video_info in enumerate(info['playlist'])]

        return self.playlist_result(entries, display_id, video_title)
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`from ..utils import (`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`parse_duration,`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`parse_iso8601,`
			`)`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`from ..compat import compat_str`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00

			`class DiscoveryIE(InfoExtractor):`
[discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com 2015-12-31 16:29:37 +00:00			`_VALID_URL = r'''(?x)http://(?:www\.)?(?:`
			`discovery\|`
			`investigationdiscovery\|`
			`discoverylife\|`
			`animalplanet\|`
			`ahctv\|`
			`destinationamerica\|`
			`sciencechannel\|`
			`tlc\|`
			`velocity`
[discovery] improve _VALID_URL regex 2015-12-31 17:24:49 +00:00			`)\.com/(?:[^/]+/)*(?P<id>[^./?#]+)'''`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`_TESTS = [{`
[discovery] Change default url URL does a redirect from dsc.discovery.com to www.discovery.com This commit fixes the correct URL. 2014-06-24 15:41:53 +00:00			`'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00			`'info_dict': {`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`'id': '20769',`
			`'ext': 'mp4',`
[discovery] Fix extractor 2015-01-08 16:36:56 +00:00			`'title': 'Mission Impossible Outtakes',`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00			`'description': ('Watch Jamie Hyneman and Adam Savage practice being'`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`' each other -- to the point of confusing Jamie\'s dog -- and '`
			`'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'`
			`' back.'),`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00			`'duration': 156,`
[discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com 2015-12-31 16:29:37 +00:00			`'timestamp': 1302032462,`
			`'upload_date': '20110405',`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00			`},`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`'params': {`
			`'skip_download': True, # requires ffmpeg`
			`}`
			`}, {`
			`'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',`
			`'info_dict': {`
			`'id': 'mythbusters-the-simpsons',`
			`'title': 'MythBusters: The Simpsons',`
			`},`
[discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com 2015-12-31 16:29:37 +00:00			`'playlist_mincount': 10,`
			`}, {`
			`'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',`
			`'info_dict': {`
			`'id': '78326',`
			`'ext': 'mp4',`
			`'title': 'Longfin Eels: Maneaters?',`
			`'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.',`
			`'upload_date': '20140725',`
			`'timestamp': 1406246400,`
			`'duration': 116,`
			`},`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`}]`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00
			`def _real_extract(self, url):`
[discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com 2015-12-31 16:29:37 +00:00			`display_id = self._match_id(url)`
			`info = self._download_json(url + '?flat=1', display_id)`
[discovery] Extract more info and simplify 2014-01-27 11:41:30 +00:00
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`video_title = info.get('playlist_title') or info.get('video_title')`
Added support for Discovery Issue #2227 2014-01-26 08:35:34 +00:00
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`entries = [{`
			`'id': compat_str(video_info['id']),`
			`'formats': self._extract_m3u8_formats(`
[discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com 2015-12-31 16:29:37 +00:00			`video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`note='Download m3u8 information for video %d' % (idx + 1)),`
			`'title': video_info['title'],`
			`'description': video_info.get('description'),`
			`'duration': parse_duration(video_info.get('video_length')),`
[discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com 2015-12-31 16:29:37 +00:00			`'webpage_url': video_info.get('href') or video_info.get('url'),`
[discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. 2015-06-07 08:34:19 +00:00			`'thumbnail': video_info.get('thumbnailURL'),`
			`'alt_title': video_info.get('secondary_title'),`
			`'timestamp': parse_iso8601(video_info.get('publishedDate')),`
			`} for idx, video_info in enumerate(info['playlist'])]`

[discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com 2015-12-31 16:29:37 +00:00			`return self.playlist_result(entries, display_id, video_title)`