youtube-dl/youtube_dl/extractor/hitbox.py

215 lines
7.2 KiB
Python
Raw Normal View History

2014-12-28 17:07:32 +00:00
# coding: utf-8
from __future__ import unicode_literals
2014-12-29 21:22:07 +00:00
2014-12-29 20:10:59 +00:00
import re
2014-12-28 17:07:32 +00:00
from .common import InfoExtractor
from ..utils import (
2014-12-29 21:22:07 +00:00
clean_html,
parse_iso8601,
float_or_none,
int_or_none,
compat_str,
determine_ext,
2014-12-28 17:07:32 +00:00
)
class HitboxIE(InfoExtractor):
2014-12-29 21:22:07 +00:00
IE_NAME = 'hitbox'
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
_TESTS = [{
2014-12-28 17:07:32 +00:00
'url': 'http://www.hitbox.tv/video/203213',
'info_dict': {
'id': '203213',
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
'alt_title': 'hitboxlive - Aug 9th #6',
2014-12-29 21:22:07 +00:00
'description': '',
2014-12-28 17:07:32 +00:00
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
2014-12-29 21:22:07 +00:00
'duration': 215.1666,
2014-12-28 17:07:32 +00:00
'resolution': 'HD 720p',
2014-12-29 21:22:07 +00:00
'uploader': 'hitboxlive',
2014-12-28 17:07:32 +00:00
'view_count': int,
2014-12-29 21:22:07 +00:00
'timestamp': 1407576133,
2014-12-28 17:07:32 +00:00
'upload_date': '20140809',
'categories': ['Live Show'],
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
'only_matching': True,
}]
2014-12-28 17:07:32 +00:00
2014-12-29 20:10:59 +00:00
def _extract_metadata(self, url, video_id):
2014-12-28 17:07:32 +00:00
thumb_base = 'https://edge.sf.hitbox.tv'
metadata = self._download_json(
'%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
2014-12-28 17:07:32 +00:00
2014-12-29 20:10:59 +00:00
date = 'media_live_since'
media_type = 'livestream'
if metadata.get('media_type') == 'video':
media_type = 'video'
date = 'media_date_added'
video_meta = metadata.get(media_type, [])[0]
2014-12-28 17:07:32 +00:00
title = video_meta.get('media_status')
alt_title = video_meta.get('media_title')
2014-12-29 21:22:07 +00:00
description = clean_html(
video_meta.get('media_description')
or video_meta.get('media_description_md'))
2014-12-29 21:22:07 +00:00
duration = float_or_none(video_meta.get('media_duration'))
2014-12-28 17:07:32 +00:00
uploader = video_meta.get('media_user_name')
2014-12-29 21:22:07 +00:00
views = int_or_none(video_meta.get('media_views'))
timestamp = parse_iso8601(video_meta.get(date), ' ')
2014-12-28 17:07:32 +00:00
categories = [video_meta.get('category_name')]
thumbs = [{
'url': thumb_base + video_meta.get('media_thumbnail'),
'width': 320,
'height': 180
}, {
'url': thumb_base + video_meta.get('media_thumbnail_large'),
'width': 768,
'height': 432
}]
2014-12-28 17:07:32 +00:00
return {
'id': video_id,
'title': title,
'alt_title': alt_title,
'description': description,
'ext': 'mp4',
'thumbnails': thumbs,
'duration': duration,
2014-12-29 21:22:07 +00:00
'uploader': uploader,
2014-12-28 17:07:32 +00:00
'view_count': views,
2014-12-29 21:22:07 +00:00
'timestamp': timestamp,
2014-12-28 17:07:32 +00:00
'categories': categories,
}
2014-12-29 20:10:59 +00:00
def _real_extract(self, url):
video_id = self._match_id(url)
player_config = self._download_json(
'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
2015-04-22 15:09:21 +00:00
video_id, 'Downloading video JSON')
2014-12-29 20:10:59 +00:00
formats = []
for video in player_config['clip']['bitrates']:
label = video.get('label')
if label == 'Auto':
continue
video_url = video.get('url')
if not video_url:
continue
bitrate = int_or_none(video.get('bitrate'))
if determine_ext(video_url) == 'm3u8':
if not video_url.startswith('http'):
continue
formats.append({
'url': video_url,
'ext': 'mp4',
'tbr': bitrate,
'format_note': label,
'protocol': 'm3u8_native',
})
else:
formats.append({
'url': video_url,
'tbr': bitrate,
'format_note': label,
})
2015-04-22 15:01:52 +00:00
self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/video', video_id)
metadata['formats'] = formats
2014-12-29 20:10:59 +00:00
return metadata
class HitboxLiveIE(HitboxIE):
2014-12-29 21:22:07 +00:00
IE_NAME = 'hitbox:live'
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
_TESTS = [{
2014-12-29 20:10:59 +00:00
'url': 'http://www.hitbox.tv/dimak',
'info_dict': {
'id': 'dimak',
'ext': 'mp4',
2014-12-29 21:22:07 +00:00
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
'timestamp': int,
'upload_date': compat_str,
'title': compat_str,
'uploader': 'Dimak',
2014-12-29 20:10:59 +00:00
},
'params': {
# live
'skip_download': True,
},
}, {
'url': 'https://www.smashcast.tv/dimak',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
2014-12-29 20:10:59 +00:00
def _real_extract(self, url):
video_id = self._match_id(url)
player_config = self._download_json(
'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
2014-12-29 21:22:07 +00:00
video_id)
2014-12-29 20:10:59 +00:00
formats = []
cdns = player_config.get('cdns')
servers = []
for cdn in cdns:
# Subscribe URLs are not playable
if cdn.get('rtmpSubscribe') is True:
continue
2014-12-29 20:10:59 +00:00
base_url = cdn.get('netConnectionUrl')
host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
2014-12-29 20:10:59 +00:00
if base_url not in servers:
servers.append(base_url)
for stream in cdn.get('bitrates'):
label = stream.get('label')
if label == 'Auto':
continue
stream_url = stream.get('url')
if not stream_url:
continue
bitrate = int_or_none(stream.get('bitrate'))
if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
if not stream_url.startswith('http'):
continue
2014-12-29 20:10:59 +00:00
formats.append({
'url': stream_url,
2014-12-29 20:10:59 +00:00
'ext': 'mp4',
'tbr': bitrate,
'format_note': label,
'rtmp_live': True,
})
else:
formats.append({
'url': '%s/%s' % (base_url, stream_url),
'ext': 'mp4',
'tbr': bitrate,
2014-12-29 20:10:59 +00:00
'rtmp_live': True,
'format_note': host,
'page_url': url,
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
})
self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/live', video_id)
2014-12-29 20:10:59 +00:00
metadata['formats'] = formats
metadata['is_live'] = True
metadata['title'] = self._live_title(metadata.get('title'))
2014-12-29 20:10:59 +00:00
return metadata