1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-06-02 10:29:28 +00:00

Update for 2024

This commit is contained in:
dirkf 2024-02-11 20:21:19 +00:00 committed by GitHub
parent ccc7329011
commit 320f89ef89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 57 additions and 73 deletions

View file

@ -426,10 +426,7 @@ from .gamespot import GameSpotIE
from .gamestar import GameStarIE from .gamestar import GameStarIE
from .gaskrank import GaskrankIE from .gaskrank import GaskrankIE
from .gazeta import GazetaIE from .gazeta import GazetaIE
from .gbnews import ( from .gbnews import GBNewsIE
GBNewsIE,
GBNewsLiveIE,
)
from .gdcvault import GDCVaultIE from .gdcvault import GDCVaultIE
from .gedidigital import GediDigitalIE from .gedidigital import GediDigitalIE
from .generic import GenericIE from .generic import GenericIE

View file

@ -2,23 +2,24 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..utils import ( from ..utils import (
extract_attributes, extract_attributes,
ExtractorError, ExtractorError,
try_get, T,
traverse_obj,
txt_or_none,
url_or_none,
) )
class GBNewsIE(InfoExtractor): class GBNewsIE(InfoExtractor):
'''GB News clips and features''' IE_DESC = 'GB News clips, features and live stream'
# \w+ is normally shows or news, but apparently any word redirects to the correct URL # \w+ is normally shows or news, but apparently any word redirects to the correct URL
_VALID_URL = r'https?://(?:www\.)?gbnews\.uk/(?:\w+(?:/(?P<display_id>[^/]+))?|a)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
_PLATFORM = 'safari' _PLATFORM = 'safari'
_SSMP_URL = 'https://mm-dev.simplestream.com/ssmp/api.php' _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
_TESTS = [{ _TESTS = [{
'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889', 'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889',
'info_dict': { 'info_dict': {
@ -27,11 +28,32 @@ class GBNewsIE(InfoExtractor):
'title': "Andrew Neil's message to companies choosing to boycott GB News", 'title': "Andrew Neil's message to companies choosing to boycott GB News",
'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351', 'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351',
}, },
}, 'skip': '404 not found',
] }, {
'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
'info_dict': {
'id': '52264136',
'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
'ext': 'mp4',
'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
'description': 'The post was criticised by former employers of the broadcaster',
},
}, {
'url': 'https://www.gbnews.uk/watchlive',
'info_dict': {
'id': '1069',
'display_id': 'watchlive',
'ext': 'mp4',
'title': 'GB News Live',
'is_live': True,
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url).split('/')[-1]
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
# extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341 # extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341
@ -50,35 +72,32 @@ class GBNewsIE(InfoExtractor):
''' '''
# exception if no match # exception if no match
video_data = self._search_regex( video_data = self._search_regex(
r'<\s*div\s[^>]*class\s*=\s*([\'"])simplestream\1[^>]*>', r'(<div\s[^>]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)',
webpage, "video data", group=0) webpage, 'video data')
# print(video_data)
video_data = extract_attributes(video_data) video_data = extract_attributes(video_data)
ss_id = try_get(video_data, lambda x: x['data-id']) ss_id = video_data.get('data-id')
if not ss_id: if not ss_id:
raise ExtractorError('Simplestream ID not found') raise ExtractorError('Simplestream ID not found')
# exception if no JSON
json_data = self._download_json( json_data = self._download_json(
self._SSMP_URL, display_id, self._SSMP_URL, display_id,
note='Downloading Simplestream JSON metadata', note='Downloading Simplestream JSON metadata',
errnote='Unable to download Simplestream JSON metadata', errnote='Unable to download Simplestream JSON metadata',
query={ query={
'id': ss_id, 'id': ss_id,
'env': video_data.get('data-env'), 'env': video_data.get('data-env', 'production'),
}) }, fatal=False)
meta_url = try_get(json_data, lambda x: x['response']['api_hostname'], compat_str) meta_url = traverse_obj(json_data, ('response', 'api_hostname'))
if not meta_url: if not meta_url:
raise ExtractorError('No API host found') raise ExtractorError('No API host found')
uvid = video_data.get('data-uvid') uvid = video_data['data-uvid']
dtype = video_data.get('data-type') dtype = video_data.get('data-type')
# exception if no JSON
stream_data = self._download_json( stream_data = self._download_json(
'%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid), '%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid),
display_id, uvid,
query={ query={
'key': video_data.get('data-key'), 'key': video_data.get('data-key'),
'platform': self._PLATFORM, 'platform': self._PLATFORM,
@ -87,66 +106,34 @@ class GBNewsIE(InfoExtractor):
'Token': video_data.get('data-token'), 'Token': video_data.get('data-token'),
'Token-Expiry': video_data.get('data-expiry'), 'Token-Expiry': video_data.get('data-expiry'),
'Uvid': uvid, 'Uvid': uvid,
}) }, fatal=False)
stream_url = try_get(stream_data, lambda x: x['response']['stream'], compat_str) stream_url = traverse_obj(stream_data, (
'response', 'stream', T(url_or_none)))
if not stream_url: if not stream_url:
raise ExtractorError('No stream data') raise ExtractorError('No stream data/URL')
# now known to be a dict # now known to be a dict
stream_data = stream_data['response'] stream_data = stream_data['response']
drm = stream_data.get('drm') drm = stream_data.get('drm')
if drm: if drm:
raise ExtractorError( self.report_drm(uvid)
'Stream is requesting DRM (%s) playback: unsupported' % drm,
expected=True)
formats = []
formats.extend(
self._extract_m3u8_formats(stream_url, display_id, ext='mp4', fatal=False))
formats = self._extract_m3u8_formats(
stream_url, uvid, ext='mp4', entry_protocol='m3u8_native',
fatal=False)
# exception if no formats # exception if no formats
self._sort_formats(formats) self._sort_formats(formats)
# no 'title' attribute seen, but if it comes ...
title = stream_data.get('title') or self._og_search_title(webpage)
return { return {
'id': display_id, 'id': uvid,
'title': title, 'display_id': display_id,
'title': (traverse_obj(stream_data, ('title', T(txt_or_none)))
or self._og_search_title(webpage, default=None)
or display_id.replace('-', ' ').capitalize()),
'description': self._og_search_description(webpage, default=None), 'description': self._og_search_description(webpage, default=None),
'thumbnail': video_data.get('data-poster') or None, 'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none)))
or self._og_search_thumbnail(webpage)),
'formats': formats, 'formats': formats,
'is_live': 'Live' in self.IE_NAME, 'is_live': (dtype == 'live') or None,
} }
class GBNewsLiveIE(GBNewsIE):
'''GB News live programme stream'''
_VALID_URL = r'https?://(?:www.)?gbnews.uk/(?P<id>watchlive)(?:$|[/?#])'
_TESTS = [{
'url': 'https://www.gbnews.uk/watchlive',
'info_dict': {
'id': 'watchlive',
'ext': 'mp4',
'title': "Watchlive",
'is_live': True,
},
},
]
'''
<div id="video-104872"
class="simplestream"
data-id="gb002"
data-type="live"
data-key="3Li3Nt2Qs8Ct3Xq9Fi5Uy0Mb2Bj0Qs"
data-token="d10b3ea37f6ce539ffd1ce2f6ce5fe35"
data-expiry="1624984755"
data-uvid="1069"
data-poster=""
data-npaw="false"
data-env="production">
</div>
'''