1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-06-02 10:29:28 +00:00
youtube-dl/youtube_dl/extractor/chelseafc.py

84 lines
3 KiB
Python
Raw Normal View History

2023-03-23 12:00:44 +00:00
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
traverse_obj,
unified_timestamp,
url_or_none,
)
2023-03-23 12:00:44 +00:00
class ChelseafcIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chelseafc\.com(?:/[a-z]+)?/video/(?P<id>[a-z0-9]+(?:-[a-z0-9]+)*)'
_TESTS = [{
'url': 'https://www.chelseafc.com/en/video/full-match-chelsea-2-2-everton',
2023-10-02 16:29:54 +00:00
'md5': '2fda617911b7148a2a19bec55b75d30a',
2023-03-23 12:00:44 +00:00
'info_dict': {
'id': 'full-match-chelsea-2-2-everton',
'ext': 'mp4',
'title': 'Full Match: Chelsea 2-2 Everton',
2023-04-02 10:28:44 +00:00
'description': 'Full match highlights from Chelsea\'s 2-2 Premier League draw with Everton at Stamford Bridge.',
2023-03-23 12:00:44 +00:00
'duration': 2842.0,
'timestamp': 1679184000,
2023-04-02 10:28:44 +00:00
'tags': ['Premier League', 'Everton', 'Video and article choice'],
2023-03-23 12:00:44 +00:00
'upload_date': '20230319',
'thumbnail': r're:https?://.*\.png'
}
},
{
'url': 'https://www.chelseafc.com/en/video/manchester-city-vs-chelsea-2-0-or-highlights-or-efl-cup',
2023-10-02 16:29:54 +00:00
'md5': '2905365c3c9cf4612f303fbb99c2f4ca',
2023-03-23 12:00:44 +00:00
'info_dict': {
'id': 'manchester-city-vs-chelsea-2-0-or-highlights-or-efl-cup',
'ext': 'mp4',
'title': 'Manchester City 2-0 Chelsea | Highlights | EFL Cup',
'description': 'Highlights from our EFL Cup match against Man City.',
'duration': 120.0,
'timestamp': 1668042000,
'upload_date': '20221110',
2023-04-02 10:28:44 +00:00
'tags': ['Highlights', 'League Cup', 'Manchester City'],
2023-03-23 12:00:44 +00:00
'thumbnail': r're:https?://.*\.jpg'
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
2023-03-24 08:22:13 +00:00
video_details_div = self._search_regex(
2023-04-02 11:31:05 +00:00
r'(<div\s[^>]*\bdata-component\s*=\s*(?:"|\')\s*VideoDetails\s*(?:"|\')[^>]*>)',
2023-03-23 12:00:44 +00:00
webpage,
2023-03-24 08:22:13 +00:00
'div'
)
raw_data = self._html_search_regex(
r'<div[^>]*\sdata-props\s*=\s*(?:"|\')\s*([^"\']*)\s*(?:"|\')[^>]*>',
video_details_div,
2023-03-23 12:00:44 +00:00
'data'
)
2023-03-24 08:22:13 +00:00
data = self._parse_json(raw_data, video_id)
manifest_url = data['videoDetail']['signedUrl']
2023-03-23 12:00:44 +00:00
data = data['videoDetail']
2023-03-23 12:00:44 +00:00
title = data['title']
formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4')
self._sort_formats(formats)
txt_or_none = lambda x: x.strip() or None
2023-03-23 12:00:44 +00:00
return {
'id': video_id,
'title': title,
'description': txt_or_none(data.get('description')),
2023-03-23 12:00:44 +00:00
'formats': formats,
'duration': parse_duration(data.get('duration')),
'timestamp': unified_timestamp(data.get('releaseDate')),
'tags': traverse_obj(data, ('tags', Ellipsis, 'title'), expected_type=txt_or_none),
'thumbnail': traverse_obj(data, ('image', 'file', 'url'), expected_type=url_or_none),
2023-03-23 12:00:44 +00:00
}