mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-29 05:32:00 +00:00
[wistia] Add extractor
This commit is contained in:
parent
72135030d1
commit
ef4fd84857
4 changed files with 80 additions and 10 deletions
|
@ -488,7 +488,8 @@ class YoutubeDL(object):
|
||||||
new_result = ie_result.copy()
|
new_result = ie_result.copy()
|
||||||
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||||
'entries', 'urlhandle', 'ie_key', 'duration',
|
'entries', 'urlhandle', 'ie_key', 'duration',
|
||||||
'subtitles', 'annotations', 'format'):
|
'subtitles', 'annotations', 'format',
|
||||||
|
'thumbnail', 'thumbnails'):
|
||||||
if f in new_result:
|
if f in new_result:
|
||||||
del new_result[f]
|
del new_result[f]
|
||||||
if f in embedded_info:
|
if f in embedded_info:
|
||||||
|
|
|
@ -178,6 +178,7 @@ from .wat import WatIE
|
||||||
from .websurg import WeBSurgIE
|
from .websurg import WeBSurgIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
|
from .wistia import WistiaIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import XHamsterIE
|
||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
|
|
|
@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
|
||||||
# Site Name | Video Title
|
# Site Name | Video Title
|
||||||
# Video Title - Tagline | Site Name
|
# Video Title - Tagline | Site Name
|
||||||
# and so on and so forth; it's just not practical
|
# and so on and so forth; it's just not practical
|
||||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
video_title = self._html_search_regex(
|
||||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
r'(?s)<title>(.*?)</title>', webpage, u'video title',
|
||||||
|
default=u'video')
|
||||||
|
|
||||||
|
# video uploader is domain name
|
||||||
|
video_uploader = self._search_regex(
|
||||||
|
r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||||
|
@ -188,7 +193,7 @@ class GenericIE(InfoExtractor):
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for embedded YouTube player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||||
for tuppl in matches]
|
for tuppl in matches]
|
||||||
|
@ -197,13 +202,26 @@ class GenericIE(InfoExtractor):
|
||||||
|
|
||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
|
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
|
||||||
for tuppl in matches]
|
for tuppl in matches]
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
|
# Look for embedded Wistia player
|
||||||
|
match = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||||
|
if match:
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': unescapeHTML(match.group('url')),
|
||||||
|
'ie_key': 'Wistia',
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'title': video_title,
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
|
@ -247,14 +265,9 @@ class GenericIE(InfoExtractor):
|
||||||
# here's a fun little line of code for you:
|
# here's a fun little line of code for you:
|
||||||
video_id = os.path.splitext(video_id)[0]
|
video_id = os.path.splitext(video_id)[0]
|
||||||
|
|
||||||
# video uploader is domain name
|
|
||||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
|
||||||
url, u'video uploader')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': None,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
}
|
}
|
||||||
|
|
55
youtube_dl/extractor/wistia.py
Normal file
55
youtube_dl/extractor/wistia.py
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class WistiaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
|
||||||
|
u"file": u"sh7fpupwlt.mov",
|
||||||
|
u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
|
||||||
|
u"info_dict": {
|
||||||
|
u"title": u"cfh_resourceful_zdkh_final_1"
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data_json = self._html_search_regex(
|
||||||
|
r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
|
||||||
|
|
||||||
|
data = json.loads(data_json)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnails = []
|
||||||
|
for atype, a in data['assets'].items():
|
||||||
|
if atype == 'still':
|
||||||
|
thumbnails.append({
|
||||||
|
'url': a['url'],
|
||||||
|
'resolution': '%dx%d' % (a['width'], a['height']),
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
if atype == 'preview':
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': atype,
|
||||||
|
'url': a['url'],
|
||||||
|
'width': a['width'],
|
||||||
|
'height': a['height'],
|
||||||
|
'filesize': a['size'],
|
||||||
|
'ext': a['ext'],
|
||||||
|
})
|
||||||
|
formats.sort(key=lambda a: a['filesize'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': data['name'],
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
Loading…
Reference in a new issue