mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-23 01:08:10 +00:00
Merge remote-tracking branch 'origin/master'
Conflicts: youtube_dl/YoutubeDL.py
This commit is contained in:
commit
dcca581967
13 changed files with 246 additions and 34 deletions
|
@ -38,6 +38,7 @@ from youtube_dl.utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
@ -131,6 +132,37 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||||
|
|
||||||
|
def test_sanitize_path(self):
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
return
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||||
|
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc|def'), 'abc#def')
|
||||||
|
self.assertEqual(sanitize_path('<>:"|?*'), '#######')
|
||||||
|
self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
|
||||||
|
'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
|
||||||
|
'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
|
||||||
|
self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
|
||||||
|
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
|
||||||
|
|
||||||
def test_ordered_set(self):
|
def test_ordered_set(self):
|
||||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||||
self.assertEqual(orderedSet([]), [])
|
self.assertEqual(orderedSet([]), [])
|
||||||
|
|
|
@ -61,6 +61,7 @@ from .utils import (
|
||||||
render_table,
|
render_table,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
std_headers,
|
std_headers,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
takewhile_inclusive,
|
takewhile_inclusive,
|
||||||
|
@ -562,7 +563,7 @@ class YoutubeDL(object):
|
||||||
if v is not None)
|
if v is not None)
|
||||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||||
|
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
|
||||||
tmpl = compat_expanduser(outtmpl)
|
tmpl = compat_expanduser(outtmpl)
|
||||||
filename = tmpl % template_dict
|
filename = tmpl % template_dict
|
||||||
# Temporary fix for #4787
|
# Temporary fix for #4787
|
||||||
|
@ -1261,7 +1262,7 @@ class YoutubeDL(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dn = os.path.dirname(encodeFilename(filename))
|
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
||||||
if dn and not os.path.exists(dn):
|
if dn and not os.path.exists(dn):
|
||||||
os.makedirs(dn)
|
os.makedirs(dn)
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
|
|
|
@ -281,7 +281,7 @@ class F4mFD(FileDownloader):
|
||||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||||
else:
|
else:
|
||||||
bootstrap_url = None
|
bootstrap_url = None
|
||||||
bootstrap = base64.b64decode(node.text)
|
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||||
boot_info = read_bootstrap_info(bootstrap)
|
boot_info = read_bootstrap_info(bootstrap)
|
||||||
return (boot_info, bootstrap_url)
|
return (boot_info, bootstrap_url)
|
||||||
|
|
||||||
|
@ -308,7 +308,7 @@ class F4mFD(FileDownloader):
|
||||||
live = boot_info['live']
|
live = boot_info['live']
|
||||||
metadata_node = media.find(_add_ns('metadata'))
|
metadata_node = media.find(_add_ns('metadata'))
|
||||||
if metadata_node is not None:
|
if metadata_node is not None:
|
||||||
metadata = base64.b64decode(metadata_node.text)
|
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||||
else:
|
else:
|
||||||
metadata = None
|
metadata = None
|
||||||
|
|
||||||
|
|
|
@ -175,6 +175,7 @@ from .gameone import (
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
|
@ -363,6 +364,7 @@ from .pbs import PBSIE
|
||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
from .planetaplay import PlanetaPlayIE
|
from .planetaplay import PlanetaPlayIE
|
||||||
|
from .pladform import PladformIE
|
||||||
from .played import PlayedIE
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
|
|
|
@ -2,13 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
xpath_text,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
|
||||||
'title': 'American Dad - Putting Francine Out of Business',
|
'title': 'American Dad - Putting Francine Out of Business',
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
|
||||||
for video in collection.get('videos'):
|
for video in collection.get('videos'):
|
||||||
if video.get('slug') == slug:
|
if video.get('slug') == slug:
|
||||||
return collection, video
|
return collection, video
|
||||||
|
return None, None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(url, episode_path)
|
webpage = self._download_webpage(url, episode_path)
|
||||||
|
|
||||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
bootstrapped_data = self._parse_json(self._search_regex(
|
||||||
|
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
||||||
try:
|
|
||||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
|
||||||
except ValueError as ve:
|
|
||||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
|
||||||
raise ExtractorError(errmsg, cause=ve)
|
|
||||||
|
|
||||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||||
if is_playlist:
|
if is_playlist:
|
||||||
collections = bootstrappedData['playlists']['collections']
|
collections = bootstrapped_data['playlists']['collections']
|
||||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||||
video_info = self.find_video_info(collection, episode_path)
|
video_info = self.find_video_info(collection, episode_path)
|
||||||
|
|
||||||
show_title = video_info['showTitle']
|
show_title = video_info['showTitle']
|
||||||
segment_ids = [video_info['videoPlaybackID']]
|
segment_ids = [video_info['videoPlaybackID']]
|
||||||
else:
|
else:
|
||||||
collections = bootstrappedData['show']['collections']
|
collections = bootstrapped_data['show']['collections']
|
||||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||||
|
|
||||||
show = bootstrappedData['show']
|
# Video wasn't found in the collections, let's try `slugged_video`.
|
||||||
|
if video_info is None:
|
||||||
|
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||||
|
video_info = bootstrapped_data['slugged_video']
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find video info')
|
||||||
|
|
||||||
|
show = bootstrapped_data['show']
|
||||||
show_title = show['title']
|
show_title = show['title']
|
||||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
|
||||||
'tbr': media['bitRate'],
|
'tbr': media['bitRate'],
|
||||||
'width': media['width'],
|
'width': media['width'],
|
||||||
'height': media['height'],
|
'height': media['height'],
|
||||||
} for media in info['media']]
|
} for media in info['media'] if media.get('mediaPurpose') == 'play']
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
formats.append({
|
formats.append({
|
||||||
|
|
38
youtube_dl/extractor/gazeta.py
Normal file
38
youtube_dl/extractor/gazeta.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class GazetaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||||
|
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '205566',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '«70–80 процентов гражданских в Донецке на грани голода»',
|
||||||
|
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
embed_url = '%s?p=embed' % mobj.group('url')
|
||||||
|
embed_page = self._download_webpage(
|
||||||
|
embed_url, display_id, 'Downloading embed page')
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
|
|
@ -596,6 +596,19 @@ class GenericIE(InfoExtractor):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Pladform embed
|
||||||
|
{
|
||||||
|
'url': 'http://muz-tv.ru/kinozal/view/7400/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100183293',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||||
|
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 694,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
# RSS feed with enclosure
|
# RSS feed with enclosure
|
||||||
{
|
{
|
||||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
|
@ -1193,6 +1206,12 @@ class GenericIE(InfoExtractor):
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
|
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
|
||||||
|
|
||||||
|
# Look for Pladform embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Pladform')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
|
90
youtube_dl/extractor/pladform.py
Normal file
90
youtube_dl/extractor/pladform.py
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
xpath_text,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PladformIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
out\.pladform\.ru/player|
|
||||||
|
static\.pladform\.ru/player\.swf
|
||||||
|
)
|
||||||
|
\?.*\bvideoid=|
|
||||||
|
video\.pladform\.ru/catalog/video/videoid/
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
# http://muz-tv.ru/kinozal/view/7400/
|
||||||
|
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
|
||||||
|
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100183293',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||||
|
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 694,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_xml(
|
||||||
|
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
if video.tag == 'error':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, video.text),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
quality = qualities(('ld', 'sd', 'hd'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': src.text,
|
||||||
|
'format_id': src.get('quality'),
|
||||||
|
'quality': quality(src.get('quality')),
|
||||||
|
} for src in video.findall('./src')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, fatal=False) or xpath_text(
|
||||||
|
video, './/title', 'title', fatal=True)
|
||||||
|
description = self._search_regex(
|
||||||
|
r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
|
||||||
|
video, './/cover', 'cover')
|
||||||
|
|
||||||
|
duration = int_or_none(xpath_text(video, './/time', 'duration'))
|
||||||
|
age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
|
@ -53,10 +53,10 @@ class TeamcocoIE(InfoExtractor):
|
||||||
embed = self._download_webpage(
|
embed = self._download_webpage(
|
||||||
embed_url, video_id, 'Downloading embed page')
|
embed_url, video_id, 'Downloading embed page')
|
||||||
|
|
||||||
encoded_data = self._search_regex(
|
player_data = self._parse_json(self._search_regex(
|
||||||
r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
|
r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
|
||||||
data = self._parse_json(
|
data = self._parse_json(
|
||||||
base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
|
base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
||||||
|
|
|
@ -358,13 +358,12 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||||
'p': random.randint(1000000, 10000000),
|
'p': random.randint(1000000, 10000000),
|
||||||
'player': 'twitchweb',
|
'player': 'twitchweb',
|
||||||
'segment_preference': '4',
|
'segment_preference': '4',
|
||||||
'sig': access_token['sig'],
|
'sig': access_token['sig'].encode('utf-8'),
|
||||||
'token': access_token['token'],
|
'token': access_token['token'].encode('utf-8'),
|
||||||
}
|
}
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
'%s/api/channel/hls/%s.m3u8?%s'
|
'%s/api/channel/hls/%s.m3u8?%s'
|
||||||
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
|
||||||
channel_id, 'mp4')
|
channel_id, 'mp4')
|
||||||
self._prefer_source(formats)
|
self._prefer_source(formats)
|
||||||
|
|
||||||
|
|
|
@ -41,13 +41,10 @@ class VidmeIE(InfoExtractor):
|
||||||
duration = float_or_none(self._html_search_regex(
|
duration = float_or_none(self._html_search_regex(
|
||||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
||||||
view_count = str_to_int(self._html_search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
||||||
like_count = str_to_int(self._html_search_regex(
|
like_count = str_to_int(self._html_search_regex(
|
||||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
comment_count = str_to_int(self._html_search_regex(
|
|
||||||
r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
|
|
||||||
webpage, 'comment count', fatal=False))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -61,5 +58,4 @@ class VidmeIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'comment_count': comment_count,
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# In case of error, try to remove win32 forbidden chars
|
# In case of error, try to remove win32 forbidden chars
|
||||||
alt_filename = os.path.join(
|
alt_filename = sanitize_path(filename)
|
||||||
re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
|
|
||||||
for path_part in os.path.split(filename)
|
|
||||||
)
|
|
||||||
if alt_filename == filename:
|
if alt_filename == filename:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
# An exception here should be caught in the caller
|
# An exception here should be caught in the caller
|
||||||
stream = open(encodeFilename(filename), open_mode)
|
stream = open(encodeFilename(alt_filename), open_mode)
|
||||||
return (stream, alt_filename)
|
return (stream, alt_filename)
|
||||||
|
|
||||||
|
|
||||||
|
@ -311,6 +308,24 @@ def sanitize_filename(s, restricted=False, is_id=False):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_path(s):
|
||||||
|
"""Sanitizes and normalizes path on Windows"""
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
return s
|
||||||
|
drive, _ = os.path.splitdrive(s)
|
||||||
|
unc, _ = os.path.splitunc(s)
|
||||||
|
unc_or_drive = unc or drive
|
||||||
|
norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
|
||||||
|
if unc_or_drive:
|
||||||
|
norm_path.pop(0)
|
||||||
|
sanitized_path = [
|
||||||
|
re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
|
||||||
|
for path_part in norm_path]
|
||||||
|
if unc_or_drive:
|
||||||
|
sanitized_path.insert(0, unc_or_drive + os.path.sep)
|
||||||
|
return os.path.join(*sanitized_path)
|
||||||
|
|
||||||
|
|
||||||
def orderedSet(iterable):
|
def orderedSet(iterable):
|
||||||
""" Remove all duplicates from the input iterable """
|
""" Remove all duplicates from the input iterable """
|
||||||
res = []
|
res = []
|
||||||
|
|
Loading…
Reference in a new issue