youtube-dl/youtube_dl/extractor/ign.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor


class IGNIE(InfoExtractor):
    """
    Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
    Some videos of it.ign.com are also supported
    """

    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
    IE_NAME = 'ign.com'

    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
    _DESCRIPTION_RE = [
        r'<span class="page-object-description">(.+?)</span>',
        r'id="my_show_video">.*?<p>(.*?)</p>',
        r'<meta name="description" content="(.*?)"',
    ]

    _TESTS = [
        {
            'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
            'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
            'info_dict': {
                'id': '8f862beef863986b2785559b9e1aa599',
                'ext': 'mp4',
                'title': 'The Last of Us Review',
                'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
            }
        },
        {
            'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
            'playlist': [
                {
                    'info_dict': {
                        'id': '5ebbd138523268b93c9141af17bec937',
                        'ext': 'mp4',
                        'title': 'GTA 5 Video Review',
                        'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                    },
                },
                {
                    'info_dict': {
                        'id': '638672ee848ae4ff108df2a296418ee2',
                        'ext': 'mp4',
                        'title': '26 Twisted Moments from GTA 5 in Slow Motion',
                        'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
            ],
            'params': {
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
            'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
            'info_dict': {
                'id': '078fdd005f6d3c02f63d795faa1b984f',
                'ext': 'mp4',
                'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
                'description': (
                    'Giant skeletons, bloody hunts, and captivating'
                    ' natural beauty take our breath away.'
                ),
            },
        },
    ]

    def _find_video_id(self, webpage):
        res_id = [
            r'"video_id"\s*:\s*"(.*?)"',
            r'data-video-id="(.+?)"',
            r'<object id="vid_(.+?)"',
            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
        ]
        return self._search_regex(res_id, webpage, 'video id')

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name_or_id = mobj.group('name_or_id')
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type != 'video':
            multiple_urls = re.findall(
                '<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
                webpage)
            if multiple_urls:
                entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
                return {
                    '_type': 'playlist',
                    'id': name_or_id,
                    'entries': entries,
                }

        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
                                              webpage, 'video description', flags=re.DOTALL)
        result['description'] = description
        return result

    def _get_video_info(self, video_id):
        config_url = self._CONFIG_URL_TEMPLATE % video_id
        config = self._download_json(config_url, video_id)
        media = config['playlist']['media']

        return {
            'id': media['metadata']['videoId'],
            'url': media['url'],
            'title': media['metadata']['title'],
            'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
        }


class OneUPIE(IGNIE):
    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
    IE_NAME = '1up.com'

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

    _TESTS = [{
        'url': 'http://gamevideos.1up.com/video/id/34976.html',
        'md5': '68a54ce4ebc772e4b71e3123d413163d',
        'info_dict': {
            'id': '34976',
            'ext': 'mp4',
            'title': 'Sniper Elite V2 - Trailer',
            'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
        }
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        result = super(OneUPIE, self)._real_extract(url)
        result['id'] = mobj.group('name_or_id')
        return result
[ign] Modernize 2014-03-29 13:34:03 +00:00			`from __future__ import unicode_literals`

Add IGNIE Only for www.ign.com, it doesn't support country specific versions (like es.ign.com) 2013-07-11 22:00:21 +00:00			`import re`

			`from .common import InfoExtractor`

[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00
Add IGNIE Only for www.ign.com, it doesn't support country specific versions (like es.ign.com) 2013-07-11 22:00:21 +00:00			`class IGNIE(InfoExtractor):`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`"""`
			`Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.`
			`Some videos of it.ign.com are also supported`
			`"""`

[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos\|show_videos\|articles\|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`IE_NAME = 'ign.com'`
Add IGNIE Only for www.ign.com, it doesn't support country specific versions (like es.ign.com) 2013-07-11 22:00:21 +00:00
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`_DESCRIPTION_RE = [`
			`r'<span class="page-object-description">(.+?)</span>',`
			`r'id="my_show_video">.?<p>(.?)</p>',`
[ign] Fix extraction of video in articles 2014-08-26 13:38:29 +00:00			`r'<meta name="description" content="(.*?)"',`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`]`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`_TESTS = [`
			`{`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',`
			`'md5': 'eac8bdc1890980122c3b66f14bdd02e9',`
			`'info_dict': {`
			`'id': '8f862beef863986b2785559b9e1aa599',`
			`'ext': 'mp4',`
			`'title': 'The Last of Us Review',`
			`'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`}`
			`},`
			`{`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',`
			`'playlist': [`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`{`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`'info_dict': {`
			`'id': '5ebbd138523268b93c9141af17bec937',`
			`'ext': 'mp4',`
			`'title': 'GTA 5 Video Review',`
			`'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`},`
			`},`
			`{`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`'info_dict': {`
			`'id': '638672ee848ae4ff108df2a296418ee2',`
			`'ext': 'mp4',`
			`'title': '26 Twisted Moments from GTA 5 in Slow Motion',`
			`'description': 'The twisted beauty of GTA 5 in stunning slow motion.',`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`},`
			`},`
			`],`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`'params': {`
			`'skip_download': True,`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`},`
			`},`
[ign] Fix extraction of video in articles 2014-08-26 13:38:29 +00:00			`{`
			`'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',`
			`'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',`
			`'info_dict': {`
			`'id': '078fdd005f6d3c02f63d795faa1b984f',`
			`'ext': 'mp4',`
			`'title': 'Rewind Theater - Wild Trailer Gamescom 2014',`
Fix all PEP8 issues except E501 2014-11-23 21:21:46 +00:00			`'description': (`
			`'Giant skeletons, bloody hunts, and captivating'`
			`' natural beauty take our breath away.'`
			`),`
[ign] Fix extraction of video in articles 2014-08-26 13:38:29 +00:00			`},`
			`},`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`]`
Add IGNIE Only for www.ign.com, it doesn't support country specific versions (like es.ign.com) 2013-07-11 22:00:21 +00:00
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`def _find_video_id(self, webpage):`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`res_id = [`
[ign] Fix extraction 2014-09-18 14:57:34 +00:00			`r'"video_id"\s:\s"(.*?)"',`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`r'data-video-id="(.+?)"',`
			`r'<object id="vid_(.+?)"',`
			`r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',`
[ign] Fix extraction of video in articles 2014-08-26 13:38:29 +00:00			`r'class="hero-poster[^"]?"[^>]id="(.+?)"',`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`]`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`return self._search_regex(res_id, webpage, 'video id')`

Add IGNIE Only for www.ign.com, it doesn't support country specific versions (like es.ign.com) 2013-07-11 22:00:21 +00:00			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`name_or_id = mobj.group('name_or_id')`
[ign] extract videos from articles pages 2013-08-29 19:39:36 +00:00			`page_type = mobj.group('type')`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`webpage = self._download_webpage(url, name_or_id)`
[ign] Fix extraction of video in articles 2014-08-26 13:38:29 +00:00			`if page_type != 'video':`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`multiple_urls = re.findall(`
[ign] Fix extraction 2014-09-18 14:57:34 +00:00			`'<param name="flashvars"[^>]value="[^"]?url=(https?://www\.ign\.com/videos/.*?)["&]',`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00			`webpage)`
			`if multiple_urls:`
[ign] Return proper playlist object 2014-09-29 03:05:06 +00:00			`entries = [self.url_result(u, ie='IGN') for u in multiple_urls]`
			`return {`
			`'_type': 'playlist',`
			`'id': name_or_id,`
			`'entries': entries,`
			`}`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`video_id = self._find_video_id(webpage)`
			`result = self._get_video_info(video_id)`
			`description = self._html_search_regex(self._DESCRIPTION_RE,`
PEP8: applied even more rules 2014-11-23 20:39:15 +00:00			`webpage, 'video description', flags=re.DOTALL)`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`result['description'] = description`
			`return result`

			`def _get_video_info(self, video_id):`
			`config_url = self._CONFIG_URL_TEMPLATE % video_id`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`config = self._download_json(config_url, video_id)`
Add IGNIE Only for www.ign.com, it doesn't support country specific versions (like es.ign.com) 2013-07-11 22:00:21 +00:00			`media = config['playlist']['media']`

[ign] Modernize 2014-03-29 13:34:03 +00:00			`return {`
			`'id': media['metadata']['videoId'],`
			`'url': media['url'],`
			`'title': media['metadata']['title'],`
			`'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),`
			`}`
Add IGNIE Only for www.ign.com, it doesn't support country specific versions (like es.ign.com) 2013-07-11 22:00:21 +00:00

[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`class OneUPIE(IGNIE):`
[1up.com] Urls end now with '.html' 2014-09-18 15:02:53 +00:00			`_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`IE_NAME = '1up.com'`

			`_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'`

[test_all_urls] Add support for distributed URL matching test definition 2014-04-19 17:41:06 +00:00			`_TESTS = [{`
[1up.com] Urls end now with '.html' 2014-09-18 15:02:53 +00:00			`'url': 'http://gamevideos.1up.com/video/id/34976.html',`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`'md5': '68a54ce4ebc772e4b71e3123d413163d',`
			`'info_dict': {`
			`'id': '34976',`
			`'ext': 'mp4',`
			`'title': 'Sniper Elite V2 - Trailer',`
			`'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`}`
[test_all_urls] Add support for distributed URL matching test definition 2014-04-19 17:41:06 +00:00			`}]`
[ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 18:59:34 +00:00
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`result = super(OneUPIE, self)._real_extract(url)`
[ign] Modernize 2014-03-29 13:34:03 +00:00			`result['id'] = mobj.group('name_or_id')`
[ign]: support some country versions and add an extractor for 1up.com 1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id 2013-07-12 09:39:40 +00:00			`return result`