From bd18824c2a99d6d01b00edfa186b9fd227af255c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 30 Dec 2020 13:43:56 +0100 Subject: [PATCH] [yandexdisk] fix extraction(closes #17861)(closes #27131) --- youtube_dl/extractor/yandexdisk.py | 138 +++++++++++++++++------------ 1 file changed, 81 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py index e8f6ae10f..21f37c192 100644 --- a/youtube_dl/extractor/yandexdisk.py +++ b/youtube_dl/extractor/yandexdisk.py @@ -1,19 +1,40 @@ # coding: utf-8 from __future__ import unicode_literals +import json + from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( determine_ext, + ExtractorError, float_or_none, int_or_none, - try_get, - urlencode_postdata, + mimetype2ext, + parse_iso8601, + urljoin, ) class YandexDiskIE(InfoExtractor): - _VALID_URL = r'https?://yadi\.sk/[di]/(?P[^/?#&]+)' + _VALID_URL = r'''(?x)https?:// + (?: + (?:www\.)?yadi\.sk| + disk\.yandex\. + (?: + az| + by| + co(?:m(?:\.(?:am|ge|tr))?|\.il)| + ee| + fr| + k[gz]| + l[tv]| + md| + t[jm]| + u[az]| + ru + ) + )/(?:[di]/|public.*?\bhash=)(?P[^/?#&]+)''' _TESTS = [{ 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', @@ -25,94 +46,97 @@ class YandexDiskIE(InfoExtractor): 'duration': 168.6, 'uploader': 'y.botova', 'uploader_id': '300043621', + 'timestamp': 1421396809, + 'upload_date': '20150116', 'view_count': int, }, }, { 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce', 'only_matching': True, + }, { + 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - status = self._download_webpage( - 'https://disk.yandex.com/auth/status', video_id, query={ - 'urlOrigin': url, - 'source': 'public', - 'md5': 'false', - }) + try: + resource = self._download_json( + 'https://cloud-api.yandex.net/v1/disk/public/resources', + video_id, query={'public_key': url}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error_description = self._parse_json( + e.cause.read().decode(), video_id)['description'] + raise ExtractorError(error_description, expected=True) + raise - sk = self._search_regex( - r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P(?:(?!\2).)+)\2', - status, 'sk', group='value') + title = resource['name'] + public_url = resource.get('public_url') + if public_url: + video_id = self._match_id(public_url) - webpage = self._download_webpage(url, video_id) + self._set_cookie('yadi.sk', 'yandexuid', '0') - models = self._parse_json( - self._search_regex( - r']+id=["\']models-client[^>]+>\s*(\[.+?\])\s*