mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 11:41:52 +00:00
[brightcove] Relax video tag embeds extraction
BrightcoveNewIE.extract_urls(): Handle player data when it is all in attributes of the <video> tag, and also the id is data-brightcove-video-id not data-video-id. Add test to generic extractor, note HEAD gives 404, no checksum computed(?).
This commit is contained in:
parent
3dfceb286c
commit
40158f55c9
2 changed files with 92 additions and 20 deletions
|
@ -17,6 +17,7 @@ from ..compat import (
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
extract_attributes,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
@ -109,6 +110,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
'upload_date': '20140827',
|
'upload_date': '20140827',
|
||||||
'uploader_id': '710858724001',
|
'uploader_id': '710858724001',
|
||||||
},
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# playlist with 'videoList'
|
# playlist with 'videoList'
|
||||||
|
@ -490,9 +492,10 @@ class BrightcoveNewIE(InfoExtractor):
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
# Reference:
|
# Reference:
|
||||||
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
|
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
|
||||||
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
|
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
|
||||||
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
|
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
|
||||||
# 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
|
# 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
|
||||||
|
# 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
|
@ -501,22 +504,39 @@ class BrightcoveNewIE(InfoExtractor):
|
||||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||||
|
|
||||||
# Look for embed_in_page embeds [2]
|
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||||
for video_id, account_id, player_id, embed in re.findall(
|
# [2] looks like:
|
||||||
# According to examples from [3] it's unclear whether video id
|
# <video data-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
|
||||||
# may be optional and what to do when it is
|
|
||||||
# According to [4] data-video-id may be prefixed with ref:
|
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||||
r'''(?sx)
|
r'''(?isx)
|
||||||
<video[^>]+
|
(<video[^>]+>)
|
||||||
data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
|
(?:.*?
|
||||||
</video>.*?
|
(<script[^>]+
|
||||||
<script[^>]+
|
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
)
|
||||||
''', webpage):
|
)?
|
||||||
entries.append(
|
''', webpage
|
||||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
):
|
||||||
% (account_id, player_id, embed, video_id))
|
attrs = extract_attributes(video)
|
||||||
|
|
||||||
|
# According to examples from [4] it's unclear whether video id
|
||||||
|
# may be optional and what to do when it is
|
||||||
|
video_id = attrs.get('data-video-id')
|
||||||
|
# See PR#12099/bostonglobe.py for 'data-brightcove-video-id' variant
|
||||||
|
|
||||||
|
if not account_id:
|
||||||
|
account_id = attrs.get('data-account')
|
||||||
|
if not player_id:
|
||||||
|
player_id = attrs.get('data-player')
|
||||||
|
if not embed:
|
||||||
|
embed = attrs.get('data-embed')
|
||||||
|
|
||||||
|
if video_id and account_id and player_id and embed:
|
||||||
|
entries.append(
|
||||||
|
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||||
|
% (account_id, player_id, embed, video_id))
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
|
@ -449,6 +449,59 @@ class GenericIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Brightcove with UUID in videoPlayer
|
||||||
|
'url': 'http://www8.hp.com/cn/zh/home.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5255815316001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sprocket Video - China',
|
||||||
|
'description': 'Sprocket Video - China',
|
||||||
|
'uploader': 'HP-Video Gallery',
|
||||||
|
'timestamp': 1482263210,
|
||||||
|
'upload_date': '20161220',
|
||||||
|
'uploader_id': '1107601872001',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # m3u8 download
|
||||||
|
},
|
||||||
|
'skip': 'video rotates...weekly?',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Brightcove:new type [2].
|
||||||
|
'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
|
||||||
|
'md5': '2b35148fcf48da41c9fb4591650784f3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5348741021001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20170306',
|
||||||
|
'uploader_id': '4191638492001',
|
||||||
|
'timestamp': 1488769918,
|
||||||
|
'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
|
||||||
|
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Alternative brightcove <video> attributes
|
||||||
|
'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
|
||||||
|
'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': '732d22ba3d33f2f3fc253c39f8f36523',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5311302538001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
|
||||||
|
'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
|
||||||
|
'timestamp': 1486321708,
|
||||||
|
'upload_date': '20170205',
|
||||||
|
'uploader_id': '800000640001',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# Brightcove with UUID in videoPlayer
|
# Brightcove with UUID in videoPlayer
|
||||||
'url': 'http://www8.hp.com/cn/zh/home.html',
|
'url': 'http://www8.hp.com/cn/zh/home.html',
|
||||||
|
@ -1900,7 +1953,6 @@ class GenericIE(InfoExtractor):
|
||||||
# Look for Brightcove Legacy Studio embeds
|
# Look for Brightcove Legacy Studio embeds
|
||||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||||
if bc_urls:
|
if bc_urls:
|
||||||
self.to_screen('Brightcove video detected.')
|
|
||||||
entries = [{
|
entries = [{
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': smuggle_url(bc_url, {'Referer': url}),
|
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||||
|
|
Loading…
Reference in a new issue