mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-22 02:01:50 +00:00
[mediaset] Improve embed support (closes #17668)
This commit is contained in:
parent
60ce0c67fd
commit
8fd12a0831
2 changed files with 33 additions and 7 deletions
|
@ -3023,7 +3023,7 @@ class GenericIE(InfoExtractor):
|
||||||
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
||||||
|
|
||||||
# Look for Mediaset embeds
|
# Look for Mediaset embeds
|
||||||
mediaset_urls = MediasetIE._extract_urls(webpage)
|
mediaset_urls = MediasetIE._extract_urls(self, webpage)
|
||||||
if mediaset_urls:
|
if mediaset_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||||
|
|
|
@ -4,6 +4,11 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformBaseIE
|
from .theplatform import ThePlatformBaseIE
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(ie, webpage):
|
||||||
return [
|
def _qs(url):
|
||||||
mobj.group('url')
|
return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
for mobj in re.finditer(
|
|
||||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
|
def _program_guid(qs):
|
||||||
webpage)]
|
return qs.get('programGuid', [None])[0]
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
|
||||||
|
webpage):
|
||||||
|
embed_url = mobj.group('url')
|
||||||
|
embed_qs = _qs(embed_url)
|
||||||
|
program_guid = _program_guid(embed_qs)
|
||||||
|
if program_guid:
|
||||||
|
entries.append(embed_url)
|
||||||
|
continue
|
||||||
|
video_id = embed_qs.get('id', [None])[0]
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
urlh = ie._request_webpage(
|
||||||
|
embed_url, video_id, note='Following embed URL redirect')
|
||||||
|
embed_url = compat_str(urlh.geturl())
|
||||||
|
program_guid = _program_guid(_qs(embed_url))
|
||||||
|
if program_guid:
|
||||||
|
entries.append(embed_url)
|
||||||
|
return entries
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
guid = self._match_id(url)
|
guid = self._match_id(url)
|
||||||
|
|
Loading…
Reference in a new issue