1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-25 03:32:05 +00:00

[raiplay:playlist] Fix issues and improve (closes #14563)

This commit is contained in:
Sergey M․ 2017-12-09 00:46:28 +07:00
parent d21d0ba6c1
commit 1115271ac6
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 37 additions and 27 deletions

View file

@ -856,8 +856,8 @@ from .radiofrance import RadioFranceIE
from .rai import (
RaiPlayIE,
RaiPlayLiveIE,
RaiPlayPlaylistIE,
RaiIE,
RaiPlaylistIE,
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE

View file

@ -17,6 +17,7 @@ from ..utils import (
parse_duration,
strip_or_none,
try_get,
unescapeHTML,
unified_strdate,
unified_timestamp,
update_url_query,
@ -249,6 +250,41 @@ class RaiPlayLiveIE(RaiBaseIE):
}
class RaiPlayPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': {
'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo',
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
},
'playlist_mincount': 12,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_meta(
('programma', 'nomeProgramma'), webpage, 'title')
description = unescapeHTML(self._html_search_meta(
('description', 'og:description'), webpage, 'description'))
print(description)
entries = []
for mobj in re.finditer(
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
webpage):
video_url = urljoin(url, mobj.group('path'))
entries.append(self.url_result(
video_url, ie=RaiPlayIE.ie_key(),
video_id=RaiPlayIE._match_id(video_url)))
return self.playlist_result(entries, playlist_id, title, description)
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{
@ -455,29 +491,3 @@ class RaiIE(RaiBaseIE):
info.update(relinker_info)
return info
class RaiPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': {
'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo',
},
'playlist_mincount': 12,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_meta('programma', webpage, default=None)
video_urls = re.findall(' href="(/raiplay/video.+)"', webpage)
video_urls = [urljoin(url, video_url) for video_url in video_urls]
entries = [
self.url_result(
video_url,
RaiPlayIE.ie_key())
for video_url in video_urls if RaiPlayIE.suitable(video_url)
]
return self.playlist_result(entries, playlist_id, title)