mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-29 05:32:00 +00:00
[instagram] Improve extraction (closes #22880)
This commit is contained in:
parent
58e55198c1
commit
f22b5a6b96
1 changed files with 65 additions and 63 deletions
|
@ -122,9 +122,9 @@ class InstagramIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
(video_url, description, thumbnail, timestamp, uploader,
|
(media, video_url, description, thumbnail, timestamp, uploader,
|
||||||
uploader_id, like_count, comment_count, comments, height,
|
uploader_id, like_count, comment_count, comments, height,
|
||||||
width) = [None] * 11
|
width) = [None] * 12
|
||||||
|
|
||||||
shared_data = self._parse_json(
|
shared_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
|
@ -137,15 +137,17 @@ class InstagramIE(InfoExtractor):
|
||||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||||
dict)
|
dict)
|
||||||
|
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/pull/22880)
|
||||||
if not media:
|
if not media:
|
||||||
additional_data = self._parse_json(
|
additional_data = self._parse_json(
|
||||||
self._search_regex(r'window\.__additionalDataLoaded\(\'[^\']+\',\s*({.+?})\);',
|
self._search_regex(
|
||||||
|
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||||
webpage, 'additional data', default='{}'),
|
webpage, 'additional data', default='{}'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
if additional_data:
|
if additional_data:
|
||||||
media = try_get(
|
media = try_get(
|
||||||
additional_data,
|
additional_data, lambda x: x['graphql']['shortcode_media'],
|
||||||
lambda x: x['graphql']['shortcode_media'],
|
|
||||||
dict)
|
dict)
|
||||||
if media:
|
if media:
|
||||||
video_url = media.get('video_url')
|
video_url = media.get('video_url')
|
||||||
|
|
Loading…
Reference in a new issue