mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 11:41:52 +00:00
[instagram] Add support for user profiles (Fixes #2606)
This commit is contained in:
parent
257cfebfe6
commit
ea38e55fff
6 changed files with 124 additions and 29 deletions
|
@ -110,3 +110,21 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||||
self.assertEqual(expected, got,
|
self.assertEqual(expected, got,
|
||||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||||
|
|
||||||
|
# Check for the presence of mandatory fields
|
||||||
|
for key in ('id', 'url', 'title', 'ext'):
|
||||||
|
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||||
|
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||||
|
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||||
|
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
||||||
|
|
||||||
|
# Are checkable fields missing from the test case definition?
|
||||||
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
|
for key, value in got_dict.items()
|
||||||
|
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||||
|
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||||
|
if missing_keys:
|
||||||
|
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
||||||
|
self.assertFalse(
|
||||||
|
missing_keys,
|
||||||
|
'Missing keys in test definition: %s' % (
|
||||||
|
', '.join(sorted(missing_keys))))
|
||||||
|
|
|
@ -137,25 +137,6 @@ def generator(test_case):
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
|
|
||||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
||||||
|
|
||||||
# Check for the presence of mandatory fields
|
|
||||||
for key in ('id', 'url', 'title', 'ext'):
|
|
||||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
|
||||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
|
||||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
|
||||||
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
|
|
||||||
|
|
||||||
# Are checkable fields missing from the test case definition?
|
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
|
||||||
for key, value in info_dict.items()
|
|
||||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
|
||||||
missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys())
|
|
||||||
if missing_keys:
|
|
||||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
|
||||||
self.assertFalse(
|
|
||||||
missing_keys,
|
|
||||||
'Missing keys in test definition: %s' % (
|
|
||||||
','.join(sorted(missing_keys))))
|
|
||||||
finally:
|
finally:
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,10 @@ import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import (
|
||||||
|
expect_info_dict,
|
||||||
|
FakeYDL,
|
||||||
|
)
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
AcademicEarthCourseIE,
|
AcademicEarthCourseIE,
|
||||||
|
@ -39,6 +41,7 @@ from youtube_dl.extractor import (
|
||||||
TEDIE,
|
TEDIE,
|
||||||
ToypicsUserIE,
|
ToypicsUserIE,
|
||||||
XTubeUserIE,
|
XTubeUserIE,
|
||||||
|
InstagramUserIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -287,5 +290,28 @@ class TestPlaylists(unittest.TestCase):
|
||||||
self.assertEqual(result['id'], 'greenshowers')
|
self.assertEqual(result['id'], 'greenshowers')
|
||||||
self.assertTrue(len(result['entries']) >= 155)
|
self.assertTrue(len(result['entries']) >= 155)
|
||||||
|
|
||||||
|
def test_InstagramUser(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = InstagramUserIE(dl)
|
||||||
|
result = ie.extract('http://instagram.com/porsche')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'porsche')
|
||||||
|
self.assertTrue(len(result['entries']) >= 2)
|
||||||
|
test_video = next(
|
||||||
|
e for e in result['entries']
|
||||||
|
if e['id'] == '614605558512799803_462752227')
|
||||||
|
dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
|
||||||
|
dl.process_video_result(test_video, download=False)
|
||||||
|
EXPECTED = {
|
||||||
|
'id': '614605558512799803_462752227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#Porsche Intelligent Performance.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'uploader': 'Porsche',
|
||||||
|
'uploader_id': 'porsche',
|
||||||
|
}
|
||||||
|
expect_info_dict(self, EXPECTED, test_video)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -512,13 +512,7 @@ class YoutubeDL(object):
|
||||||
'_type': 'compat_list',
|
'_type': 'compat_list',
|
||||||
'entries': ie_result,
|
'entries': ie_result,
|
||||||
}
|
}
|
||||||
self.add_extra_info(ie_result,
|
self.add_default_extra_info(ie_result, ie, url)
|
||||||
{
|
|
||||||
'extractor': ie.IE_NAME,
|
|
||||||
'webpage_url': url,
|
|
||||||
'webpage_url_basename': url_basename(url),
|
|
||||||
'extractor_key': ie.ie_key(),
|
|
||||||
})
|
|
||||||
if process:
|
if process:
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
|
@ -537,6 +531,14 @@ class YoutubeDL(object):
|
||||||
else:
|
else:
|
||||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||||
|
|
||||||
|
def add_default_extra_info(self, ie_result, ie, url):
|
||||||
|
self.add_extra_info(ie_result, {
|
||||||
|
'extractor': ie.IE_NAME,
|
||||||
|
'webpage_url': url,
|
||||||
|
'webpage_url_basename': url_basename(url),
|
||||||
|
'extractor_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
|
||||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||||
"""
|
"""
|
||||||
Take the result of the ie(may be modified) and resolve all unresolved
|
Take the result of the ie(may be modified) and resolve all unresolved
|
||||||
|
|
|
@ -112,7 +112,7 @@ from .imdb import (
|
||||||
)
|
)
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
from .iprima import IPrimaIE
|
from .iprima import IPrimaIE
|
||||||
from .ivi import (
|
from .ivi import (
|
||||||
|
|
|
@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
|
@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'description': desc,
|
'description': desc,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class InstagramUserIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
|
||||||
|
IE_DESC = 'Instagram user profile'
|
||||||
|
IE_NAME = 'instagram:user'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
uploader_id = mobj.group('username')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
page_count = 0
|
||||||
|
media_url = 'http://instagram.com/%s/media' % uploader_id
|
||||||
|
while True:
|
||||||
|
page = self._download_json(
|
||||||
|
media_url, uploader_id,
|
||||||
|
note='Downloading page %d ' % (page_count + 1),
|
||||||
|
)
|
||||||
|
page_count += 1
|
||||||
|
|
||||||
|
for it in page['items']:
|
||||||
|
if it.get('type') != 'video':
|
||||||
|
continue
|
||||||
|
like_count = int_or_none(it.get('likes', {}).get('count'))
|
||||||
|
user = it.get('user', {})
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': k,
|
||||||
|
'height': v.get('height'),
|
||||||
|
'width': v.get('width'),
|
||||||
|
'url': v['url'],
|
||||||
|
} for k, v in it['videos'].items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails_el = it.get('images', {})
|
||||||
|
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
|
||||||
|
|
||||||
|
title = it.get('caption', {}).get('text', it['id'])
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': it['id'],
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'webpage_url': it.get('link'),
|
||||||
|
'uploader': user.get('full_name'),
|
||||||
|
'uploader_id': user.get('username'),
|
||||||
|
'like_count': like_count,
|
||||||
|
'upload_timestamp': int_or_none(it.get('created_time')),
|
||||||
|
})
|
||||||
|
|
||||||
|
if not page['items']:
|
||||||
|
break
|
||||||
|
max_id = page['items'][-1]['id']
|
||||||
|
media_url = (
|
||||||
|
'http://instagram.com/%s/media?max_id=%s' % (
|
||||||
|
uploader_id, max_id))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': uploader_id,
|
||||||
|
'title': uploader_id,
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue