1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-29 13:42:37 +00:00

[ign] fix extraction(closes #24771)

This commit is contained in:
Remita Amine 2021-02-08 15:56:42 +01:00
parent 311ebdd9a5
commit 7f8b8bc418
2 changed files with 200 additions and 175 deletions

View file

@ -470,8 +470,8 @@ from .hungama import (
from .hypem import HypemIE from .hypem import HypemIE
from .ign import ( from .ign import (
IGNIE, IGNIE,
OneUPIE, IGNVideoIE,
PCMagIE, IGNArticleIE,
) )
from .iheart import ( from .iheart import (
IHeartRadioIE, IHeartRadioIE,

View file

@ -3,28 +3,39 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
HEADRequest,
determine_ext,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
strip_or_none,
try_get,
) )
class IGNIE(InfoExtractor): class IGNBaseIE(InfoExtractor):
def _call_api(self, slug):
return self._download_json(
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
class IGNIE(IGNBaseIE):
""" """
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com. Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
IE_NAME = 'ign.com' IE_NAME = 'ign.com'
_PAGE_TYPE = 'video'
_API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s' _TESTS = [{
_EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
_TESTS = [
{
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
'md5': 'febda82c4bafecd2d44b6e1a18a595f8', 'md5': 'd2e1586d9987d40fad7867bf96a018ea',
'info_dict': { 'info_dict': {
'id': '8f862beef863986b2785559b9e1aa599', 'id': '8f862beef863986b2785559b9e1aa599',
'ext': 'mp4', 'ext': 'mp4',
@ -32,13 +43,147 @@ class IGNIE(InfoExtractor):
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
'timestamp': 1370440800, 'timestamp': 1370440800,
'upload_date': '20130605', 'upload_date': '20130605',
'uploader_id': 'cberidon@ign.com', 'tags': 'count:9',
} }
}, }, {
{ 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
'info_dict': {
'id': 'ee10d774b508c9b8ec07e763b9125b91',
'ext': 'mp4',
'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
'description': 'md5:817a20299de610bd56f13175386da6fa',
'timestamp': 1420571160,
'upload_date': '20150106',
'tags': 'count:4',
}
}, {
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
video = self._call_api(display_id)
video_id = video['videoId']
metadata = video['metadata']
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
formats = []
refs = video.get('refs') or {}
m3u8_url = refs.get('m3uUrl')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
f4m_url = refs.get('f4mUrl')
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False))
for asset in (video.get('assets') or []):
asset_url = asset.get('url')
if not asset_url:
continue
formats.append({
'url': asset_url,
'tbr': int_or_none(asset.get('bitrate'), 1000),
'fps': int_or_none(asset.get('frame_rate')),
'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')),
})
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
if mezzanine_url:
formats.append({
'ext': determine_ext(mezzanine_url, 'mp4'),
'format_id': 'mezzanine',
'preference': 1,
'url': mezzanine_url,
})
self._sort_formats(formats)
thumbnails = []
for thumbnail in (video.get('thumbnails') or []):
thumbnail_url = thumbnail.get('url')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
})
tags = []
for tag in (video.get('tags') or []):
display_name = tag.get('displayName')
if not display_name:
continue
tags.append(display_name)
return {
'id': video_id,
'title': title,
'description': strip_or_none(metadata.get('description')),
'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')),
'display_id': display_id,
'thumbnails': thumbnails,
'formats': formats,
'tags': tags,
}
class IGNVideoIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
_TESTS = [{
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
'info_dict': {
'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
'ext': 'mp4',
'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
'timestamp': 1444665600,
'upload_date': '20151012',
}
}, {
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
}, {
# Youtube embed
'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
'only_matching': True,
}, {
# Twitter embed
'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
'only_matching': True,
}, {
# Vimeo embed
'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
url = self._request_webpage(req, video_id).geturl()
ign_url = compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
if ign_url:
return self.url_result(ign_url, IGNIE.ie_key())
return self.url_result(url)
class IGNArticleIE(IGNBaseIE):
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
_PAGE_TYPE = 'article'
_TESTS = [{
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
'info_dict': { 'info_dict': {
'id': '100-little-things-in-gta-5-that-will-blow-your-mind', 'id': '524497489e4e8ff5848ece34',
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
}, },
'playlist': [ 'playlist': [
{ {
@ -49,7 +194,6 @@ class IGNIE(InfoExtractor):
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880, 'timestamp': 1379339880,
'upload_date': '20130916', 'upload_date': '20130916',
'uploader_id': 'danieljkrupa@gmail.com',
}, },
}, },
{ {
@ -60,173 +204,54 @@ class IGNIE(InfoExtractor):
'description': 'The twisted beauty of GTA 5 in stunning slow motion.', 'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820, 'timestamp': 1386878820,
'upload_date': '20131212', 'upload_date': '20131212',
'uploader_id': 'togilvie@ign.com',
}, },
}, },
], ],
'params': { 'params': {
'playlist_items': '2-3',
'skip_download': True, 'skip_download': True,
}, },
}, }, {
{
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch', 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
'md5': '618fedb9c901fd086f6f093564ef8558',
'info_dict': { 'info_dict': {
'id': '078fdd005f6d3c02f63d795faa1b984f', 'id': '53ee806780a81ec46e0790f8',
'ext': 'mp4',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014', 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
'timestamp': 1408047180,
'upload_date': '20140814',
'uploader_id': 'jamesduggan1990@gmail.com',
}, },
}, 'playlist_count': 2,
{ }, {
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
'only_matching': True,
},
{
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
},
{
# videoId pattern # videoId pattern
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned', 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
'only_matching': True, 'only_matching': True,
},
]
def _find_video_id(self, webpage):
res_id = [
r'"video_id"\s*:\s*"(.*?)"',
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
r'data-video-id="(.+?)"',
r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
r'videoId&quot;\s*:\s*&quot;(.+?)&quot;',
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
]
return self._search_regex(res_id, webpage, 'video id', default=None)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name_or_id = mobj.group('name_or_id')
page_type = mobj.group('type')
webpage = self._download_webpage(url, name_or_id)
if page_type != 'video':
multiple_urls = re.findall(
r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
webpage)
if multiple_urls:
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
return {
'_type': 'playlist',
'id': name_or_id,
'entries': entries,
}
video_id = self._find_video_id(webpage)
if not video_id:
return self.url_result(self._search_regex(
self._EMBED_RE, webpage, 'embed url'))
return self._get_video_info(video_id)
def _get_video_info(self, video_id):
api_data = self._download_json(
self._API_URL_TEMPLATE % video_id, video_id)
formats = []
m3u8_url = api_data['refs'].get('m3uUrl')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
f4m_url = api_data['refs'].get('f4mUrl')
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False))
for asset in api_data['assets']:
formats.append({
'url': asset['url'],
'tbr': asset.get('actual_bitrate_kbps'),
'fps': asset.get('frame_rate'),
'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')),
})
self._sort_formats(formats)
thumbnails = [{
'url': thumbnail['url']
} for thumbnail in api_data.get('thumbnails', [])]
metadata = api_data['metadata']
return {
'id': api_data.get('videoId') or video_id,
'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
'description': metadata.get('description'),
'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')),
'display_id': metadata.get('slug') or video_id,
'uploader_id': metadata.get('creator'),
'thumbnails': thumbnails,
'formats': formats,
}
class OneUPIE(IGNIE):
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
IE_NAME = '1up.com'
_TESTS = [{
'url': 'http://gamevideos.1up.com/video/id/34976.html',
'md5': 'c9cc69e07acb675c31a16719f909e347',
'info_dict': {
'id': '34976',
'ext': 'mp4',
'title': 'Sniper Elite V2 - Trailer',
'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
'timestamp': 1313099220,
'upload_date': '20110811',
'uploader_id': 'IGN',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
result = super(OneUPIE, self)._real_extract(url)
result['id'] = mobj.group('name_or_id')
return result
class PCMagIE(IGNIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'pcmag'
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
_TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
'md5': '212d6154fd0361a2781075f1febbe9ad',
'info_dict': {
'id': 'ee10d774b508c9b8ec07e763b9125b91',
'ext': 'mp4',
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
'timestamp': 1420571160,
'upload_date': '20150106',
'uploader_id': 'cozzipix@gmail.com',
}
}, { }, {
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp', # Youtube embed
'md5': '94130c1ca07ba0adb6088350681f16c1', 'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
'info_dict': { 'only_matching': True,
'id': '042e560ba94823d43afcb12ddf7142ca', }, {
'ext': 'mp4', # IMDB embed
'title': 'HTC\'s Weird New Re Camera - What\'s New Now', 'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908', 'only_matching': True,
'timestamp': 1412953920, }, {
'upload_date': '20141010', # Facebook embed
'uploader_id': 'chris_snyder@pcmag.com', 'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
} 'only_matching': True,
}, {
# Brightcove embed
'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
'only_matching': True,
}] }]
def _real_extract(self, url):
display_id = self._match_id(url)
article = self._call_api(display_id)
def entries():
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
if media_url:
yield self.url_result(media_url, IGNIE.ie_key())
for content in (article.get('content') or []):
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
yield self.url_result(video_url)
return self.playlist_result(
entries(), article.get('articleId'),
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))