1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-16 06:15:46 +00:00
youtube-dl/youtube_dl/extractor/espn.py
Jaime Marquínez Ferrándiz 69f8595256 [espn] Extract better titles
2015-12-31 20:06:21 +01:00

78 lines
2.7 KiB
Python

from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4',
'title': '30 for 30 Shorts: Judging Jewell',
'description': None,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663',
'info_dict': {
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True,
}, {
'url': 'http://espn.go.com/nba/recap?gameId=400793786',
'only_matching': True,
}, {
'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
'only_matching': True,
}, {
'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
'only_matching': True,
}, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
r'class="video-play-button"[^>]+data-id="(\d+)',
webpage, 'video id')
cms = 'espn'
if 'data-source="intl"' in webpage:
cms = 'intl'
player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
player = self._download_webpage(
player_url, video_id)
pcode = self._search_regex(
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
title = remove_end(
self._og_search_title(webpage),
'- ESPN Video').strip()
return {
'_type': 'url_transparent',
'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
'ie_key': 'OoyalaExternal',
'title': title,
}