1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-12-22 16:57:40 +00:00

[ssa] Add extractor (Closes #5169)

This commit is contained in:
Sergey M․ 2015-03-11 21:15:36 +06:00
parent 32aaeca775
commit c792b5011f
2 changed files with 59 additions and 0 deletions

View file

@ -460,6 +460,7 @@ from .sport5 import Sport5IE
from .sportbox import SportBoxIE from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .srmediathek import SRMediathekIE from .srmediathek import SRMediathekIE
from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE from .steam import SteamIE
from .streamcloud import StreamcloudIE from .streamcloud import StreamcloudIE

View file

@ -0,0 +1,58 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unescapeHTML,
parse_duration,
)
class SSAIE(InfoExtractor):
_VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
_TEST = {
'url': 'http://ssa.nls.uk/film/3561',
'info_dict': {
'id': '3561',
'ext': 'flv',
'title': 'SHETLAND WOOL',
'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
'duration': 900,
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
streamer = self._search_regex(
r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
play_path = self._search_regex(
r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
def search_field(field_name, fatal=False):
return self._search_regex(
r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
webpage, 'title', fatal=fatal)
title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
description = unescapeHTML(search_field('Description'))
duration = parse_duration(search_field('Running time'))
thumbnail = self._search_regex(
r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
return {
'id': video_id,
'url': streamer,
'play_path': play_path,
'ext': 'flv',
'title': title,
'description': description,
'duration': duration,
'thumbnail': thumbnail,
}