mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-01-03 14:47:58 +00:00
[niconico] fix playlist extraction(closes #27428)
This commit is contained in:
parent
b8aea53682
commit
f966461476
1 changed files with 71 additions and 26 deletions
|
@ -1,20 +1,23 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
InAdvancePagedList,
|
||||||
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
remove_start,
|
remove_start,
|
||||||
|
@ -181,7 +184,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
if urlh is False:
|
if urlh is False:
|
||||||
login_ok = False
|
login_ok = False
|
||||||
else:
|
else:
|
||||||
parts = compat_urlparse.urlparse(urlh.geturl())
|
parts = compat_urllib_parse_urlparse(urlh.geturl())
|
||||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||||
login_ok = False
|
login_ok = False
|
||||||
if not login_ok:
|
if not login_ok:
|
||||||
|
@ -292,7 +295,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
video_id, 'Downloading flv info')
|
video_id, 'Downloading flv info')
|
||||||
|
|
||||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
flv_info = compat_parse_qs(flv_info_webpage)
|
||||||
if 'url' not in flv_info:
|
if 'url' not in flv_info:
|
||||||
if 'deleted' in flv_info:
|
if 'deleted' in flv_info:
|
||||||
raise ExtractorError('The video has been deleted.',
|
raise ExtractorError('The video has been deleted.',
|
||||||
|
@ -437,34 +440,76 @@ class NiconicoIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistIE(InfoExtractor):
|
class NiconicoPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '27411728',
|
'id': '27411728',
|
||||||
'title': 'AKB48のオールナイトニッポン',
|
'title': 'AKB48のオールナイトニッポン',
|
||||||
|
'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
|
||||||
|
'uploader': 'のっく',
|
||||||
|
'uploader_id': '805442',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 225,
|
'playlist_mincount': 225,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _call_api(self, list_id, resource, query):
|
||||||
|
return self._download_json(
|
||||||
|
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||||
|
'Downloading %s JSON metatdata' % resource, query=query,
|
||||||
|
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||||
|
|
||||||
|
def _parse_owner(self, item):
|
||||||
|
owner = item.get('owner') or {}
|
||||||
|
if owner:
|
||||||
|
return {
|
||||||
|
'uploader': owner.get('name'),
|
||||||
|
'uploader_id': owner.get('id'),
|
||||||
}
|
}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _fetch_page(self, list_id, page):
|
||||||
|
page += 1
|
||||||
|
items = self._call_api(list_id, 'page %d' % page, {
|
||||||
|
'page': page,
|
||||||
|
'pageSize': self._PAGE_SIZE,
|
||||||
|
})['items']
|
||||||
|
for item in items:
|
||||||
|
video = item.get('video') or {}
|
||||||
|
video_id = video.get('id')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
count = video.get('count') or {}
|
||||||
|
get_count = lambda x: int_or_none(count.get(x))
|
||||||
|
info = {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||||
|
'description': video.get('shortDescription'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'view_count': get_count('view'),
|
||||||
|
'comment_count': get_count('comment'),
|
||||||
|
'ie_key': NiconicoIE.ie_key(),
|
||||||
|
}
|
||||||
|
info.update(self._parse_owner(video))
|
||||||
|
yield info
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, list_id)
|
mylist = self._call_api(list_id, 'list', {
|
||||||
|
'pageSize': 1,
|
||||||
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
})
|
||||||
webpage, 'entries')
|
entries = InAdvancePagedList(
|
||||||
entries = json.loads(entries_json)
|
functools.partial(self._fetch_page, list_id),
|
||||||
entries = [{
|
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||||
'_type': 'url',
|
self._PAGE_SIZE)
|
||||||
'ie_key': NiconicoIE.ie_key(),
|
result = self.playlist_result(
|
||||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||||
entry['item_data']['video_id']),
|
result.update(self._parse_owner(mylist))
|
||||||
} for entry in entries]
|
return result
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
|
|
||||||
'id': list_id,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue