1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-25 11:41:52 +00:00

Merge remote-tracking branch 'upstream/master' into cspanlive

This commit is contained in:
Clay Freeman 2020-01-16 19:08:29 -06:00
commit be9300a99f
No known key found for this signature in database
GPG key ID: E8023472A3663FDC
55 changed files with 1550 additions and 1139 deletions

View file

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.15. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've verified that I'm running youtube-dl version **2020.01.15**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.11.28 [debug] youtube-dl version 2020.01.15
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View file

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.15. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've verified that I'm running youtube-dl version **2020.01.15**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View file

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.15. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've verified that I'm running youtube-dl version **2020.01.15**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View file

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.15. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've verified that I'm running youtube-dl version **2020.01.15**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.11.28 [debug] youtube-dl version 2020.01.15
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View file

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.15. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've verified that I'm running youtube-dl version **2020.01.15**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View file

@ -13,7 +13,7 @@ dist: trusty
env: env:
- YTDL_TEST_SET=core - YTDL_TEST_SET=core
- YTDL_TEST_SET=download - YTDL_TEST_SET=download
matrix: jobs:
include: include:
- python: 3.7 - python: 3.7
dist: xenial dist: xenial
@ -35,6 +35,11 @@ matrix:
env: YTDL_TEST_SET=download env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core - env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=download
- name: flake8
python: 3.8
dist: xenial
install: pip install flake8
script: flake8 .
fast_finish: true fast_finish: true
allow_failures: allow_failures:
- env: YTDL_TEST_SET=download - env: YTDL_TEST_SET=download

110
ChangeLog
View file

@ -1,3 +1,109 @@
version 2020.01.15
Extractors
* [yourporn] Fix extraction (#21645, #22255, #23459)
+ [canvas] Add support for new API endpoint (#17680, #18629)
* [ndr:base:embed] Improve thumbnails extraction (#23731)
+ [vodplatform] Add support for embed.kwikmotion.com domain
+ [twitter] Add support for promo_video_website cards (#23711)
* [orf:radio] Clean description and improve extraction
* [orf:fm4] Fix extraction (#23599)
* [safari] Fix kaltura session extraction (#23679, #23670)
* [lego] Fix extraction and extract subtitle (#23687)
* [cloudflarestream] Improve extraction
+ Add support for bytehighway.net domain
+ Add support for signed URLs
+ Extract thumbnail
* [naver] Improve extraction
* Improve geo-restriction handling
+ Extract automatic captions
+ Extract uploader metadata
+ Extract VLive HLS formats
* Improve metadata extraction
- [pandatv] Remove extractor (#23630)
* [dctp] Fix format extraction (#23656)
+ [scrippsnetworks] Add support for www.discovery.com videos
* [discovery] Fix anonymous token extraction (#23650)
* [nrktv:seriebase] Fix extraction (#23625, #23537)
* [wistia] Improve format extraction and extract subtitles (#22590)
* [vice] Improve extraction (#23631)
* [redtube] Detect private videos (#23518)
version 2020.01.01
Extractors
* [brightcove] Invalidate policy key cache on failing requests
* [pornhub] Improve locked videos detection (#22449, #22780)
+ [pornhub] Add support for m3u8 formats
* [pornhub] Fix extraction (#22749, #23082)
* [brightcove] Update policy key on failing requests
* [spankbang] Improve removed video detection (#23423)
* [spankbang] Fix extraction (#23307, #23423, #23444)
* [soundcloud] Automatically update client id on failing requests
* [prosiebensat1] Improve geo restriction handling (#23571)
* [brightcove] Cache brightcove player policy keys
* [teachable] Fail with error message if no video URL found
* [teachable] Improve locked lessons detection (#23528)
+ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981)
* [mitele] Fix extraction (#21354, #23456)
* [soundcloud] Update client id (#23516)
* [mailru] Relax URL regular expressions (#23509)
version 2019.12.25
Core
* [utils] Improve str_to_int
+ [downloader/hls] Add ability to override AES decryption key URL (#17521)
Extractors
* [mediaset] Fix parse formats (#23508)
+ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291)
+ [slideslive] Add support for url and vimeo service names (#23414)
* [slideslive] Fix extraction (#23413)
* [twitch:clips] Fix extraction (#23375)
+ [soundcloud] Add support for token protected embeds (#18954)
* [vk] Improve extraction
* Fix User Videos extraction (#23356)
* Extract all videos for lists with more than 1000 videos (#23356)
+ Add support for video albums (#14327, #14492)
- [kontrtube] Remove extractor
- [videopremium] Remove extractor
- [musicplayon] Remove extractor (#9225)
+ [ufctv] Add support for ufcfightpass.imgdge.com and
ufcfightpass.imggaming.com (#23343)
+ [twitch] Extract m3u8 formats frame rate (#23333)
+ [imggaming] Add support for playlists and extract subtitles
+ [ufcarabia] Add support for UFC Arabia (#23312)
* [ufctv] Fix extraction
* [yahoo] Fix gyao brightcove player id (#23303)
* [vzaar] Override AES decryption key URL (#17521)
+ [vzaar] Add support for AES HLS manifests (#17521, #23299)
* [nrl] Fix extraction
* [teachingchannel] Fix extraction
* [nintendo] Fix extraction and partially add support for Nintendo Direct
videos (#4592)
+ [ooyala] Add better fallback values for domain and streams variables
+ [youtube] Add support youtubekids.com (#23272)
* [tv2] Detect DRM protection
+ [tv2] Add support for katsomo.fi and mtv.fi (#10543)
* [tv2] Fix tv2.no article extraction
* [msn] Improve extraction
+ Add support for YouTube and NBCSports embeds
+ Add support for articles with multiple videos
* Improve AOL embed support
* Improve format extraction
* [abcotvs] Relax URL regular expression and improve metadata extraction
(#18014)
* [channel9] Reduce response size
* [adobetv] Improve extaction
* Use OnDemandPagedList for list extractors
* Reduce show extraction requests
* Extract original video format and subtitles
+ Add support for adobe tv embeds
version 2019.11.28 version 2019.11.28
Core Core
@ -583,7 +689,7 @@ Extractors
version 2019.04.17 version 2019.04.17
Extractors Extractors
* [openload] Randomize User-Agent (closes #20688) * [openload] Randomize User-Agent (#20688)
+ [openload] Add support for oladblock domains (#20471) + [openload] Add support for oladblock domains (#20471)
* [adn] Fix subtitle extraction (#12724) * [adn] Fix subtitle extraction (#12724)
+ [aol] Add support for localized websites + [aol] Add support for localized websites
@ -1148,7 +1254,7 @@ Extractors
+ [youtube] Extract channel meta fields (#9676, #12939) + [youtube] Extract channel meta fields (#9676, #12939)
* [porntube] Fix extraction (#17541) * [porntube] Fix extraction (#17541)
* [asiancrush] Fix extraction (#15630) * [asiancrush] Fix extraction (#15630)
+ [twitch:clips] Extend URL regular expression (closes #17559) + [twitch:clips] Extend URL regular expression (#17559)
+ [vzaar] Add support for HLS + [vzaar] Add support for HLS
* [tube8] Fix metadata extraction (#17520) * [tube8] Fix metadata extraction (#17520)
* [eporner] Extract JSON-LD (#17519) * [eporner] Extract JSON-LD (#17519)

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import io import io
import json import json
import mimetypes import mimetypes
@ -15,7 +14,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_basestring, compat_basestring,
compat_input,
compat_getpass, compat_getpass,
compat_print, compat_print,
compat_urllib_request, compat_urllib_request,
@ -40,28 +38,20 @@ class GitHubReleaser(object):
try: try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE) info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None: if info is not None:
self._username = info[0] self._token = info[2]
self._password = info[2]
compat_print('Using GitHub credentials found in .netrc...') compat_print('Using GitHub credentials found in .netrc...')
return return
else: else:
compat_print('No GitHub credentials found in .netrc') compat_print('No GitHub credentials found in .netrc')
except (IOError, netrc.NetrcParseError): except (IOError, netrc.NetrcParseError):
compat_print('Unable to parse .netrc') compat_print('Unable to parse .netrc')
self._username = compat_input( self._token = compat_getpass(
'Type your GitHub username or email address and press [Return]: ') 'Type your GitHub PAT (personal access token) and press [Return]: ')
self._password = compat_getpass(
'Type your GitHub password and press [Return]: ')
def _call(self, req): def _call(self, req):
if isinstance(req, compat_basestring): if isinstance(req, compat_basestring):
req = sanitized_Request(req) req = sanitized_Request(req)
# Authorizing manually since GitHub does not response with 401 with req.add_header('Authorization', 'token %s' % self._token)
# WWW-Authenticate header set (see
# https://developer.github.com/v3/#basic-authentication)
b64 = base64.b64encode(
('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
req.add_header('Authorization', 'Basic %s' % b64)
response = self._opener.open(req).read().decode('utf-8') response = self._opener.open(req).read().decode('utf-8')
return json.loads(response) return json.loads(response)

View file

@ -28,10 +28,11 @@
- **acast:channel** - **acast:channel**
- **ADN**: Anime Digital Network - **ADN**: Anime Digital Network
- **AdobeConnect** - **AdobeConnect**
- **AdobeTV** - **adobetv**
- **AdobeTVChannel** - **adobetv:channel**
- **AdobeTVShow** - **adobetv:embed**
- **AdobeTVVideo** - **adobetv:show**
- **adobetv:video**
- **AdultSwim** - **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
- **afreecatv**: afreecatv.com - **afreecatv**: afreecatv.com
@ -396,6 +397,7 @@
- **Kankan** - **Kankan**
- **Karaoketv** - **Karaoketv**
- **KarriereVideos** - **KarriereVideos**
- **Katsomo**
- **KeezMovies** - **KeezMovies**
- **Ketnet** - **Ketnet**
- **KhanAcademy** - **KhanAcademy**
@ -403,7 +405,6 @@
- **KinjaEmbed** - **KinjaEmbed**
- **KinoPoisk** - **KinoPoisk**
- **KonserthusetPlay** - **KonserthusetPlay**
- **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью - **KrasView**: Красвью
- **Ku6** - **Ku6**
- **KUSI** - **KUSI**
@ -513,7 +514,6 @@
- **mtvjapan** - **mtvjapan**
- **mtvservices:embedded** - **mtvservices:embedded**
- **MuenchenTV**: münchen.tv - **MuenchenTV**: münchen.tv
- **MusicPlayOn**
- **mva**: Microsoft Virtual Academy videos - **mva**: Microsoft Virtual Academy videos
- **mva:course**: Microsoft Virtual Academy courses - **mva:course**: Microsoft Virtual Academy courses
- **Mwave** - **Mwave**
@ -628,7 +628,6 @@
- **OutsideTV** - **OutsideTV**
- **PacktPub** - **PacktPub**
- **PacktPubCourse** - **PacktPubCourse**
- **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV - **pandora.tv**: 판도라TV
- **ParamountNetwork** - **ParamountNetwork**
- **parliamentlive.tv**: UK parliament videos - **parliamentlive.tv**: UK parliament videos
@ -761,6 +760,7 @@
- **screen.yahoo:search**: Yahoo screen search - **screen.yahoo:search**: Yahoo screen search
- **Screencast** - **Screencast**
- **ScreencastOMatic** - **ScreencastOMatic**
- **ScrippsNetworks**
- **scrippsnetworks:watch** - **scrippsnetworks:watch**
- **SCTE** - **SCTE**
- **SCTECourse** - **SCTECourse**
@ -913,6 +913,7 @@
- **tv2.hu** - **tv2.hu**
- **TV2Article** - **TV2Article**
- **TV2DK** - **TV2DK**
- **TV2DKBornholmPlay**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+ - **TV5MondePlus**: TV5MONDE+
- **TVA** - **TVA**
@ -954,6 +955,7 @@
- **udemy** - **udemy**
- **udemy:course** - **udemy:course**
- **UDNEmbed**: 聯合影音 - **UDNEmbed**: 聯合影音
- **UFCArabia**
- **UFCTV** - **UFCTV**
- **UKTVPlay** - **UKTVPlay**
- **umg:de**: Universal Music Deutschland - **umg:de**: Universal Music Deutschland
@ -993,7 +995,6 @@
- **videomore** - **videomore**
- **videomore:season** - **videomore:season**
- **videomore:video** - **videomore:video**
- **VideoPremium**
- **VideoPress** - **VideoPress**
- **Vidio** - **Vidio**
- **VidLii** - **VidLii**

View file

@ -500,6 +500,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int('123.456'), 123456)
self.assertEqual(str_to_int(523), 523) self.assertEqual(str_to_int(523), 523)
# Python 3 has no long
if sys.version_info < (3, 0):
eval('self.assertEqual(str_to_int(123456L), 123456)')
self.assertEqual(str_to_int('noninteger'), None)
self.assertEqual(str_to_int([]), None)
def test_url_basename(self): def test_url_basename(self):
self.assertEqual(url_basename('http://foo.de/'), '') self.assertEqual(url_basename('http://foo.de/'), '')

View file

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none, int_or_none,
js_to_json,
try_get, try_get,
unified_strdate, unified_strdate,
) )
@ -13,22 +14,21 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor): class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f', 'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': { 'info_dict': {
'id': '1_5g5zua6e', 'id': '5b400b9ee338f922cb06450c',
'title': 'Summer Dinner Party', 'title': 'Weeknight Japanese Suppers',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec', 'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://',
'timestamp': 1497285541, 'timestamp': 1523664000,
'upload_date': '20170612', 'upload_date': '20180414',
'uploader_id': 'roger.metcalf@americastestkitchen.com', 'release_date': '20180414',
'release_date': '20170617',
'series': "America's Test Kitchen", 'series': "America's Test Kitchen",
'season_number': 17, 'season_number': 18,
'episode': 'Summer Dinner Party', 'episode': 'Weeknight Japanese Suppers',
'episode_number': 24, 'episode_number': 15,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -47,7 +47,7 @@ class AmericasTestKitchenIE(InfoExtractor):
self._search_regex( self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>', r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
webpage, 'initial context'), webpage, 'initial context'),
video_id) video_id, js_to_json)
ep_data = try_get( ep_data = try_get(
video_data, video_data,
@ -55,17 +55,7 @@ class AmericasTestKitchenIE(InfoExtractor):
lambda x: x['videoDetail']['content']['data']), dict) lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {}) ep_meta = ep_data.get('full_video', {})
zype_id = ep_meta.get('zype_id') zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
if zype_id:
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
ie_key = 'Zype'
else:
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id')
external_id = ep_data.get('external_id') or ep_meta['external_id']
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
ie_key = 'Kaltura'
title = ep_data.get('title') or ep_meta.get('title') title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get( description = clean_html(ep_meta.get('episode_description') or ep_data.get(
@ -79,8 +69,8 @@ class AmericasTestKitchenIE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': embed_url, 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
'ie_key': ie_key, 'ie_key': 'Zype',
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,

View file

@ -599,45 +599,63 @@ class BrightcoveNewIE(AdobePassIE):
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage( policy_key_id = '%s_%s' % (account_id, player_id)
'http://players.brightcove.net/%s/%s_%s/index.min.js' policy_key = self._downloader.cache.load('brightcove', policy_key_id)
% (account_id, player_id, embed), video_id) policy_key_extracted = False
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
policy_key = None def extract_policy_key():
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
catalog = self._search_regex( policy_key = None
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog: catalog = self._search_regex(
catalog = self._parse_json( r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
js_to_json(catalog), video_id, fatal=False)
if catalog: if catalog:
policy_key = catalog.get('policyKey') catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key: if not policy_key:
policy_key = self._search_regex( policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
store_pk(policy_key)
return policy_key
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
headers = { headers = {}
'Accept': 'application/json;pk=%s' % policy_key,
}
referrer = smuggled_data.get('referrer') referrer = smuggled_data.get('referrer')
if referrer: if referrer:
headers.update({ headers.update({
'Referer': referrer, 'Referer': referrer,
'Origin': re.search(r'https?://[^/]+', referrer).group(0), 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
}) })
try:
json_data = self._download_json(api_url, video_id, headers=headers) for _ in range(2):
except ExtractorError as e: if not policy_key:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: policy_key = extract_policy_key()
json_data = self._parse_json(e.cause.read().decode(), video_id)[0] policy_key_extracted = True
message = json_data.get('message') or json_data['error_code'] headers['Accept'] = 'application/json;pk=%s' % policy_key
if json_data.get('error_subcode') == 'CLIENT_GEO': try:
self.raise_geo_restricted(msg=message) json_data = self._download_json(api_url, video_id, headers=headers)
raise ExtractorError(message, expected=True) break
raise except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
policy_key = None
store_pk(None)
continue
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors') errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH': if errors and errors[0].get('error_subcode') == 'TVE_AUTH':

View file

@ -13,6 +13,8 @@ from ..utils import (
int_or_none, int_or_none,
merge_dicts, merge_dicts,
parse_iso8601, parse_iso8601,
str_or_none,
url_or_none,
) )
@ -20,15 +22,15 @@ class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e', 'md5': '68993eda72ef62386a15ea2cf3c93107',
'info_dict': { 'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'flv', 'ext': 'mp4',
'title': 'Nachtwacht: De Greystook', 'title': 'Nachtwacht: De Greystook',
'description': 'md5:1db3f5dc4c7109c821261e7512975be7', 'description': 'Nachtwacht: De Greystook',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.03, 'duration': 1468.04,
}, },
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, { }, {
@ -39,23 +41,45 @@ class CanvasIE(InfoExtractor):
'HLS': 'm3u8_native', 'HLS': 'm3u8_native',
'HLS_AES': 'm3u8', 'HLS_AES': 'm3u8',
} }
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site_id'), mobj.group('id') site_id, video_id = mobj.group('site_id'), mobj.group('id')
# Old API endpoint, serves more formats but may fail for some videos
data = self._download_json( data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s' 'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), video_id) % (site_id, video_id), video_id, 'Downloading asset JSON',
'Unable to download asset JSON', fatal=False)
# New API endpoint
if not data:
token = self._download_json(
'%s/tokens' % self._REST_API_BASE, video_id,
'Downloading token', data=b'',
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
data = self._download_json(
'%s/videos/%s' % (self._REST_API_BASE, video_id),
video_id, 'Downloading video JSON', fatal=False, query={
'vrtPlayerToken': token,
'client': '%s@PROD' % site_id,
}, expected_status=400)
message = data.get('message')
if message and not data.get('title'):
if data.get('code') == 'AUTHENTICATION_REQUIRED':
self.raise_login_required(message)
raise ExtractorError(message, expected=True)
title = data['title'] title = data['title']
description = data.get('description') description = data.get('description')
formats = [] formats = []
for target in data['targetUrls']: for target in data['targetUrls']:
format_url, format_type = target.get('url'), target.get('type') format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
if not format_url or not format_type: if not format_url or not format_type:
continue continue
format_type = format_type.upper()
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
@ -134,20 +158,20 @@ class CanvasEenIE(InfoExtractor):
}, },
'skip': 'Pagina niet gevonden', 'skip': 'Pagina niet gevonden',
}, { }, {
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', 'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
'info_dict': { 'info_dict': {
'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', 'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
'display_id': 'herbekijk-sorry-voor-alles', 'display_id': 'emma-pakt-thilly-aan',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Herbekijk Sorry voor alles', 'title': 'Emma pakt Thilly aan',
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', 'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3788.06, 'duration': 118.24,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Episode no longer available', 'expected_warnings': ['is not a supported codec'],
}, { }, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True, 'only_matching': True,
@ -183,19 +207,44 @@ class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be' IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
# Available via old API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
'info_dict': { 'info_dict': {
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'flv', 'ext': 'mp4',
'title': 'De zwarte weduwe', 'title': 'De zwarte weduwe',
'description': 'md5:d90c21dced7db869a85db89a623998d4', 'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
'duration': 1457.04, 'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'season': '1', 'season': 'Season 1',
'season_number': 1, 'season_number': 1,
'episode_number': 1, 'episode_number': 1,
}, },
'skip': 'This video is only available for registered users' 'skip': 'This video is only available for registered users',
'params': {
'username': '<snip>',
'password': '<snip>',
},
'expected_warnings': ['is not a supported codec'],
}, {
# Only available via new API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
'info_dict': {
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
'ext': 'mp4',
'title': 'Aflevering 5',
'description': 'Wie valt door de mand tijdens een missie?',
'duration': 2967.06,
'season': 'Season 1',
'season_number': 1,
'episode_number': 5,
},
'skip': 'This video is only available for registered users',
'params': {
'username': '<snip>',
'password': '<snip>',
},
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
}] }]
_NETRC_MACHINE = 'vrtnu' _NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'

View file

@ -1,20 +1,24 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor): class CloudflareStreamIE(InfoExtractor):
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/| (?:watch\.)?%s/|
embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo= %s
) )
(?P<id>[\da-f]+) (?P<id>%s)
''' ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_TESTS = [{ _TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': { 'info_dict': {
@ -41,23 +45,28 @@ class CloudflareStreamIE(InfoExtractor):
return [ return [
mobj.group('url') mobj.group('url')
for mobj in re.finditer( for mobj in re.finditer(
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
base_url = 'https://%s/%s/' % (domain, video_id)
if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode(
video_id.split('.')[1]), video_id)['sub']
manifest_base_url = base_url + 'manifest/video.'
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id, manifest_base_url + 'm3u8', video_id, 'mp4',
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', 'm3u8_native', m3u8_id='hls', fatal=False)
fatal=False)
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id, manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': video_id, 'title': video_id,
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
'formats': formats, 'formats': formats,
} }

View file

@ -16,10 +16,11 @@ class DctpTvIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# 4x3 # 4x3
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'md5': '3ffbd1556c3fe210724d7088fad723e3',
'info_dict': { 'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c', 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'flv', 'ext': 'm4v',
'title': 'Videoinstallation für eine Kaufhausfassade', 'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm', 'description': 'Kurzfilm',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
@ -27,10 +28,6 @@ class DctpTvIE(InfoExtractor):
'timestamp': 1302172322, 'timestamp': 1302172322,
'upload_date': '20110407', 'upload_date': '20110407',
}, },
'params': {
# rtmp download
'skip_download': True,
},
}, { }, {
# 16x9 # 16x9
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/', 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
@ -59,33 +56,26 @@ class DctpTvIE(InfoExtractor):
uuid = media['uuid'] uuid = media['uuid']
title = media['title'] title = media['title']
ratio = '16x9' if media.get('is_wide') else '4x3' is_wide = media.get('is_wide')
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio) formats = []
servers = self._download_json( def add_formats(suffix):
'http://www.dctp.tv/streaming_servers/', display_id, templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
note='Downloading server list JSON', fatal=False) formats.extend([{
'format_id': 'hls-' + suffix,
'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
'protocol': 'm3u8_native',
}, {
'format_id': 's3-' + suffix,
'url': templ % 'completed-media.s3.amazonaws.com',
}, {
'format_id': 'http-' + suffix,
'url': templ % 'cdn-media.dctp.tv',
}])
if servers: add_formats('0500_' + ('16x9' if is_wide else '4x3'))
endpoint = next( if is_wide:
server['endpoint'] add_formats('720p')
for server in servers
if url_or_none(server.get('endpoint'))
and 'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
app = self._search_regex(
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
formats = [{
'url': endpoint,
'app': app,
'play_path': play_path,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
'ext': 'flv',
}]
thumbnails = [] thumbnails = []
images = media.get('images') images = media.get('images')

View file

@ -13,8 +13,8 @@ from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE): class DiscoveryIE(DiscoveryGoBaseIE):
_VALID_URL = r'''(?x)https?:// _VALID_URL = r'''(?x)https?://
(?P<site> (?P<site>
(?:(?:www|go)\.)?discovery| go\.discovery|
(?:www\.)? www\.
(?: (?:
investigationdiscovery| investigationdiscovery|
discoverylife| discoverylife|
@ -22,8 +22,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
ahctv| ahctv|
destinationamerica| destinationamerica|
sciencechannel| sciencechannel|
tlc| tlc
velocity
)| )|
watch\. watch\.
(?: (?:
@ -83,7 +82,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
'authRel': 'authorization', 'authRel': 'authorization',
'client_id': '3020a40c2356a645b4b4', 'client_id': '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, 'redirectUri': 'https://www.discovery.com/',
})['access_token'] })['access_token']
headers = self.geo_verification_headers() headers = self.geo_verification_headers()

View file

@ -512,7 +512,6 @@ from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kusi import KUSIIE from .kusi import KUSIIE
@ -658,7 +657,6 @@ from .mtv import (
MTVJapanIE, MTVJapanIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
from .mwave import MwaveIE, MwaveMeetGreetIE from .mwave import MwaveIE, MwaveMeetGreetIE
from .mychannels import MyChannelsIE from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE
@ -811,7 +809,6 @@ from .packtpub import (
PacktPubIE, PacktPubIE,
PacktPubCourseIE, PacktPubCourseIE,
) )
from .pandatv import PandaTVIE
from .pandoratv import PandoraTVIE from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE from .patreon import PatreonIE
@ -966,7 +963,10 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import ScrippsNetworksWatchIE from .scrippsnetworks import (
ScrippsNetworksWatchIE,
ScrippsNetworksIE,
)
from .scte import ( from .scte import (
SCTEIE, SCTEIE,
SCTECourseIE, SCTECourseIE,
@ -1171,7 +1171,10 @@ from .tv2 import (
TV2ArticleIE, TV2ArticleIE,
KatsomoIE, KatsomoIE,
) )
from .tv2dk import TV2DKIE from .tv2dk import (
TV2DKIE,
TV2DKBornholmPlayIE,
)
from .tv2hu import TV2HuIE from .tv2hu import TV2HuIE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE from .tv5mondeplus import TV5MondePlusIE
@ -1235,7 +1238,10 @@ from .udemy import (
UdemyCourseIE UdemyCourseIE
) )
from .udn import UDNEmbedIE from .udn import UDNEmbedIE
from .ufctv import UFCTVIE from .ufctv import (
UFCTVIE,
UFCArabiaIE,
)
from .uktvplay import UKTVPlayIE from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .dlive import ( from .dlive import (
@ -1289,7 +1295,6 @@ from .videomore import (
VideomoreVideoIE, VideomoreVideoIE,
VideomoreSeasonIE, VideomoreSeasonIE,
) )
from .videopremium import VideoPremiumIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .vidio import VidioIE from .vidio import VidioIE
from .vidlii import VidLiiIE from .vidlii import VidLiiIE

View file

@ -2098,6 +2098,9 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Smoky Barbecue Favorites', 'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g', 'thumbnail': r're:^https?://.*\.jpe?g',
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
'upload_date': '20170909',
'timestamp': 1504915200,
}, },
'add_ie': [ZypeIE.ie_key()], 'add_ie': [ZypeIE.ie_key()],
'params': { 'params': {
@ -2960,7 +2963,7 @@ class GenericIE(InfoExtractor):
# Look for VODPlatform embeds # Look for VODPlatform embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1', r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result( return self.url_result(

View file

@ -0,0 +1,133 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
try_get,
)
class ImgGamingBaseIE(InfoExtractor):
_API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
_API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
_HEADERS = None
_MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
_REALM = None
_VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
def _real_initialize(self):
self._HEADERS = {
'Realm': 'dce.' + self._REALM,
'x-api-key': self._API_KEY,
}
email, password = self._get_login_info()
if email is None:
self.raise_login_required()
p_headers = self._HEADERS.copy()
p_headers['Content-Type'] = 'application/json'
self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
self._API_BASE + 'login',
None, 'Logging in', data=json.dumps({
'id': email,
'secret': password,
}).encode(), headers=p_headers)['authorisationToken']
def _call_api(self, path, media_id):
return self._download_json(
self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
def _extract_dve_api_url(self, media_id, media_type):
stream_path = 'stream'
if media_type == 'video':
stream_path += '/vod/'
else:
stream_path += '?eventId='
try:
return self._call_api(
stream_path, media_id)['playerUrlCallback']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
raise ExtractorError(
self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
expected=True)
raise
def _real_extract(self, url):
domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups()
if playlist_id:
if self._downloader.params.get('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
media_type, media_id = 'playlist', playlist_id
if media_type == 'playlist':
playlist = self._call_api('vod/playlist/', media_id)
entries = []
for video in try_get(playlist, lambda x: x['videos']['vods']) or []:
video_id = str_or_none(video.get('id'))
if not video_id:
continue
entries.append(self.url_result(
'https://%s/video/%s' % (domain, video_id),
self.ie_key(), video_id))
return self.playlist_result(
entries, media_id, playlist.get('title'),
playlist.get('description'))
dve_api_url = self._extract_dve_api_url(media_id, media_type)
video_data = self._download_json(dve_api_url, media_id)
is_live = media_type == 'live'
if is_live:
title = self._live_title(self._call_api('event/', media_id)['title'])
else:
title = video_data['name']
formats = []
for proto in ('hls', 'dash'):
media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
if not media_url:
continue
if proto == 'hls':
m3u8_formats = self._extract_m3u8_formats(
media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
for f in m3u8_formats:
f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
formats.append(f)
else:
formats.extend(self._extract_mpd_formats(
media_url, media_id, mpd_id='dash', fatal=False,
headers=self._MANIFEST_HEADERS))
self._sort_formats(formats)
subtitles = {}
for subtitle in video_data.get('subtitles', []):
subtitle_url = subtitle.get('url')
if not subtitle_url:
continue
subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({
'url': subtitle_url,
})
return {
'id': media_id,
'title': title,
'formats': formats,
'thumbnail': video_data.get('thumbnailUrl'),
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'tags': video_data.get('tags'),
'is_live': is_live,
'subtitles': subtitles,
}

View file

@ -1,73 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
)
class KontrTubeIE(InfoExtractor):
IE_NAME = 'kontrtube'
IE_DESC = 'KontrTube.ru - Труба зовёт'
_VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
_TEST = {
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
'md5': '975a991a4926c9a85f383a736a2e6b80',
'info_dict': {
'id': '2678',
'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
'ext': 'mp4',
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
'duration': 270,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(
url, display_id, 'Downloading page')
video_url = self._search_regex(
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
thumbnail = self._search_regex(
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
title = self._html_search_regex(
r'(?s)<h2>(.+?)</h2>', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description')
duration = self._search_regex(
r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
if duration:
duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
view_count = self._search_regex(
r'Просмотров: <em>([^<]+)</em>',
webpage, 'view count', fatal=False)
if view_count:
view_count = int_or_none(view_count.replace(' ', ''))
comment_count = int_or_none(self._search_regex(
r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'thumbnail': thumbnail,
'title': title,
'description': description,
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
}

View file

@ -2,23 +2,24 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
unescapeHTML, ExtractorError,
parse_duration, int_or_none,
get_element_by_class, qualities,
) )
class LEGOIE(InfoExtractor): class LEGOIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)' _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]{32})'
_TESTS = [{ _TESTS = [{
'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
'md5': 'f34468f176cfd76488767fc162c405fa', 'md5': 'f34468f176cfd76488767fc162c405fa',
'info_dict': { 'info_dict': {
'id': '55492d823b1b4d5e985787fa8c2973b1', 'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi', 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
@ -26,103 +27,123 @@ class LEGOIE(InfoExtractor):
}, { }, {
# geo-restricted but the contentUrl contain a valid url # geo-restricted but the contentUrl contain a valid url
'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399', 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
'md5': '4c3fec48a12e40c6e5995abc3d36cc2e', 'md5': 'c7420221f7ffd03ff056f9db7f8d807c',
'info_dict': { 'info_dict': {
'id': '13bdc2299ab24d9685701a915b3d71e7', 'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Aflevering 20 - Helden van het koninkrijk', 'title': 'Aflevering 20: Helden van het koninkrijk',
'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941', 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
'age_limit': 5,
}, },
}, { }, {
# special characters in title # with subtitle
'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d', 'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba',
'info_dict': { 'info_dict': {
'id': '9685ee9d12e84ff38e84b4e3d0db533d', 'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Force Surprise LEGO® Star Wars™ Microfighters', 'title': 'De kleine puppy',
'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3', 'description': 'md5:5b725471f849348ac73f2e12cfb4be06',
'age_limit': 1,
'subtitles': {
'nl': [{
'ext': 'srt',
'url': r're:^https://.+\.srt$',
}],
},
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}] }]
_BITRATES = [256, 512, 1024, 1536, 2560] _QUALITIES = {
'Lowest': (64, 180, 320),
'Low': (64, 270, 480),
'Medium': (96, 360, 640),
'High': (128, 540, 960),
'Highest': (128, 720, 1280),
}
def _real_extract(self, url): def _real_extract(self, url):
locale, video_id = re.match(self._VALID_URL, url).groups() locale, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id) countries = [locale.split('-')[1].upper()]
title = get_element_by_class('video-header', webpage).strip() self._initialize_geo_bypass({
progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/' 'countries': countries,
streaming_base = 'http://legoprod-f.akamaihd.net/' })
content_url = self._html_search_meta('contentUrl', webpage)
path = self._search_regex(
r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
content_url, 'video path', default=None)
if not path:
player_url = self._proto_relative_url(self._search_regex(
r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
webpage, 'player url', default=None))
if not player_url:
base_url = self._proto_relative_url(self._search_regex(
r'data-baseurl="([^"]+)"', webpage, 'base url',
default='http://www.lego.com/%s/mediaplayer/video/' % locale))
player_url = base_url + video_id
player_webpage = self._download_webpage(player_url, video_id)
video_data = self._parse_json(unescapeHTML(self._search_regex(
r"video='([^']+)'", player_webpage, 'video data')), video_id)
progressive_base = self._search_regex(
r'data-video-progressive-url="([^"]+)"',
player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
streaming_base = self._search_regex(
r'data-video-streaming-url="([^"]+)"',
player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
item_id = video_data['ItemId']
net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) try:
base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) item = self._download_json(
path = '/'.join([net_storage_path, base_path]) # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video
streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) 'https://services.slingshot.lego.com/mediaplayer/v2',
video_id, query={
'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
}, headers=self.geo_verification_headers())
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
self.raise_geo_restricted(countries=countries)
raise
formats = self._extract_akamai_formats( video = item['Video']
'%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id) video_id = video['Id']
m3u8_formats = list(filter( title = video['Title']
lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
formats)) q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest'])
if len(m3u8_formats) == len(self._BITRATES): formats = []
self._sort_formats(m3u8_formats) for video_source in item.get('VideoFormats', []):
for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats): video_source_url = video_source.get('Url')
progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate) if not video_source_url:
mp4_f = m3u8_format.copy() continue
mp4_f.update({ video_source_format = video_source.get('Format')
'url': progressive_base_url + 'mp4', if video_source_format == 'F4M':
'format_id': m3u8_format['format_id'].replace('hls', 'mp4'), formats.extend(self._extract_f4m_formats(
'protocol': 'http', video_source_url, video_id,
}) f4m_id=video_source_format, fatal=False))
web_f = { elif video_source_format == 'M3U8':
'url': progressive_base_url + 'webm', formats.extend(self._extract_m3u8_formats(
'format_id': m3u8_format['format_id'].replace('hls', 'webm'), video_source_url, video_id, 'mp4', 'm3u8_native',
'width': m3u8_format['width'], m3u8_id=video_source_format, fatal=False))
'height': m3u8_format['height'], else:
'tbr': m3u8_format.get('tbr'), video_source_quality = video_source.get('Quality')
'ext': 'webm', format_id = []
for v in (video_source_format, video_source_quality):
if v:
format_id.append(v)
f = {
'format_id': '-'.join(format_id),
'quality': q(video_source_quality),
'url': video_source_url,
} }
formats.extend([web_f, mp4_f]) quality = self._QUALITIES.get(video_source_quality)
else: if quality:
for bitrate in self._BITRATES: f.update({
for ext in ('web', 'mp4'): 'abr': quality[0],
formats.append({ 'height': quality[1],
'format_id': '%s-%s' % (ext, bitrate), 'width': quality[2],
'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext), }),
'tbr': bitrate, formats.append(f)
'ext': ext,
})
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
sub_file_id = video.get('SubFileId')
if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000':
net_storage_path = video.get('NetstoragePath')
invariant_id = video.get('InvariantId')
video_file_id = video.get('VideoFileId')
video_version = video.get('VideoVersion')
if net_storage_path and invariant_id and video_file_id and video_version:
subtitles.setdefault(locale[:2], []).append({
'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version),
})
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': self._html_search_meta('description', webpage), 'description': video.get('Description'),
'thumbnail': self._html_search_meta('thumbnail', webpage), 'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'),
'duration': parse_duration(self._html_search_meta('duration', webpage)), 'duration': int_or_none(video.get('Length')),
'formats': formats, 'formats': formats,
'subtitles': subtitles,
'age_limit': int_or_none(video.get('AgeFrom')),
'season': video.get('SeasonTitle'),
'season_number': int_or_none(video.get('Season')) or None,
'episode_number': int_or_none(video.get('Episode')) or None,
} }

View file

@ -20,10 +20,10 @@ class MailRuIE(InfoExtractor):
IE_DESC = 'Видео@Mail.Ru' IE_DESC = 'Видео@Mail.Ru'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:(?:www|m)\.)?my\.mail\.ru/ (?:(?:www|m)\.)?my\.mail\.ru/+
(?: (?:
video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)| video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html| (?:(?P<idv2prefix>(?:[^/]+/+){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
(?:video/embed|\+/video/meta)/(?P<metaid>\d+) (?:video/embed|\+/video/meta)/(?P<metaid>\d+)
) )
''' '''
@ -85,6 +85,14 @@ class MailRuIE(InfoExtractor):
{ {
'url': 'http://my.mail.ru/+/video/meta/7949340477499637815', 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
'only_matching': True, 'only_matching': True,
},
{
'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html',
'only_matching': True,
},
{
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
'only_matching': True,
} }
] ]
@ -237,7 +245,7 @@ class MailRuMusicSearchBaseIE(InfoExtractor):
class MailRuMusicIE(MailRuMusicSearchBaseIE): class MailRuMusicIE(MailRuMusicSearchBaseIE):
IE_NAME = 'mailru:music' IE_NAME = 'mailru:music'
IE_DESC = 'Музыка@Mail.Ru' IE_DESC = 'Музыка@Mail.Ru'
_VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)' _VALID_URL = r'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P<id>[\da-f]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893', 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
'md5': '0f8c22ef8c5d665b13ac709e63025610', 'md5': '0f8c22ef8c5d665b13ac709e63025610',
@ -273,7 +281,7 @@ class MailRuMusicIE(MailRuMusicSearchBaseIE):
class MailRuMusicSearchIE(MailRuMusicSearchBaseIE): class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
IE_NAME = 'mailru:music:search' IE_NAME = 'mailru:music:search'
IE_DESC = 'Музыка@Mail.Ru' IE_DESC = 'Музыка@Mail.Ru'
_VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://my\.mail\.ru/+music/+search/+(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://my.mail.ru/music/search/black%20shadow', 'url': 'https://my.mail.ru/music/search/black%20shadow',
'info_dict': { 'info_dict': {

View file

@ -123,7 +123,7 @@ class MediasetIE(ThePlatformBaseIE):
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
for video in smil.findall(self._xpath_ns('.//video', namespace)): for video in smil.findall(self._xpath_ns('.//video', namespace)):
video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src']) video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
return super()._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url) return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
def _real_extract(self, url): def _real_extract(self, url):
guid = self._match_id(url) guid = self._match_id(url)

View file

@ -4,8 +4,8 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601,
smuggle_url, smuggle_url,
parse_duration,
) )
@ -18,16 +18,18 @@ class MiTeleIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg', 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tor, la web invisible', 'title': 'Diario de La redacción Programa 144',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'description': 'md5:07c35a7b11abb05876a6a79185b58d27',
'series': 'Diario de', 'series': 'Diario de',
'season': 'La redacción', 'season': 'Season 14',
'season_number': 14, 'season_number': 14,
'season_id': 'diario_de_t14_11981', 'episode': 'Tor, la web invisible',
'episode': 'Programa 144',
'episode_number': 3, 'episode_number': 3,
'thumbnail': r're:(?i)^https?://.*\.jpg$', 'thumbnail': r're:(?i)^https?://.*\.jpg$',
'duration': 2913, 'duration': 2913,
'age_limit': 16,
'timestamp': 1471209401,
'upload_date': '20160814',
}, },
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, { }, {
@ -39,13 +41,15 @@ class MiTeleIE(InfoExtractor):
'title': 'Cuarto Milenio Temporada 6 Programa 226', 'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio', 'series': 'Cuarto Milenio',
'season': 'Temporada 6', 'season': 'Season 6',
'season_number': 6, 'season_number': 6,
'season_id': 'cuarto_milenio_t06_12715', 'episode': 'Episode 24',
'episode': 'Programa 226',
'episode_number': 24, 'episode_number': 24,
'thumbnail': r're:(?i)^https?://.*\.jpg$', 'thumbnail': r're:(?i)^https?://.*\.jpg$',
'duration': 7313, 'duration': 7313,
'age_limit': 12,
'timestamp': 1471209021,
'upload_date': '20160814',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -54,67 +58,36 @@ class MiTeleIE(InfoExtractor):
}, { }, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144-40_1006364575251/player/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
paths = self._download_json( pre_player = self._parse_json(self._search_regex(
'https://www.mitele.es/amd/agp/web/metadata/general_configuration', r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
video_id, 'Downloading paths JSON') webpage, 'Pre Player'), display_id)['prePlayer']
title = pre_player['title']
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] video = pre_player['video']
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com') video_id = video['dataMediaId']
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/') content = pre_player.get('content') or {}
source = self._download_json( info = content.get('info') or {}
'%s://%s%s%s/docs/%s' % (
ooyala_s.get('protocol', 'https'), base_url, full_path,
ooyala_s.get('provider_id', '104951'), video_id),
video_id, 'Downloading data JSON', query={
'include_titles': 'Series,Season',
'product_name': ooyala_s.get('product_name', 'test'),
'format': 'full',
})['hits']['hits'][0]['_source']
embedCode = source['offers'][0]['embed_codes'][0]
titles = source['localizable_titles'][0]
title = titles.get('title_medium') or titles['title_long']
description = titles.get('summary_long') or titles.get('summary_medium')
def get(key1, key2):
value1 = source.get(key1)
if not value1 or not isinstance(value1, list):
return
if not isinstance(value1[0], dict):
return
return value1[0].get(key2)
series = get('localizable_titles_series', 'title_medium')
season = get('localizable_titles_season', 'title_medium')
season_number = int_or_none(source.get('season_number'))
season_id = source.get('season_id')
episode = titles.get('title_sort_name')
episode_number = int_or_none(source.get('episode_number'))
duration = parse_duration(get('videos', 'duration'))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
# for some reason only HLS is supported # for some reason only HLS is supported
'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}), 'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': info.get('synopsis'),
'series': series, 'series': content.get('title'),
'season': season, 'season_number': int_or_none(info.get('season_number')),
'season_number': season_number, 'episode': content.get('subtitle'),
'season_id': season_id, 'episode_number': int_or_none(info.get('episode_number')),
'episode': episode, 'duration': int_or_none(info.get('duration')),
'episode_number': episode_number, 'thumbnail': video.get('dataPoster'),
'duration': duration, 'age_limit': int_or_none(info.get('rating')),
'thumbnail': get('images', 'url'), 'timestamp': parse_iso8601(pre_player.get('publishedTime')),
} }

View file

@ -1,66 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
)
class MusicPlayOnIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
_TESTS = [{
'url': 'http://en.musicplayon.com/play?v=433377',
'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
'info_dict': {
'id': '433377',
'ext': 'mp4',
'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
'description': 'Rick Ross Interview On Chelsea Lately',
'duration': 342,
'uploader': 'ultrafish',
},
}, {
'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
'only_matching': True,
}]
_URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
url = self._URL_TEMPLATE % video_id
page = self._download_webpage(url, video_id)
title = self._og_search_title(page)
description = self._og_search_description(page)
thumbnail = self._og_search_thumbnail(page)
duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
view_count = self._og_search_property('count', page, fatal=False)
uploader = self._html_search_regex(
r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
sources = self._parse_json(
self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
video_id, transform_source=js_to_json)
formats = [{
'url': compat_urlparse.urljoin(url, source['src']),
'ext': mimetype2ext(source.get('type')),
'format_note': source.get('data-res'),
} for source in sources]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': int_or_none(duration),
'view_count': int_or_none(view_count),
'formats': formats,
}

View file

@ -1,68 +1,33 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
dict_get,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration,
try_get,
update_url_query, update_url_query,
) )
class NaverIE(InfoExtractor): class NaverBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)' _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
_TESTS = [{ def _extract_video_info(self, video_id, vid, key):
'url': 'http://tv.naver.com/v/81652',
'info_dict': {
'id': '81652',
'ext': 'mp4',
'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
'upload_date': '20130903',
},
}, {
'url': 'http://tv.naver.com/v/395837',
'md5': '638ed4c12012c458fefcddfd01f173cd',
'info_dict': {
'id': '395837',
'ext': 'mp4',
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
'upload_date': '20150519',
},
'skip': 'Georestricted',
}, {
'url': 'http://tvcast.naver.com/v/81652',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
vid = self._search_regex(
r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'video id', fatal=None, group='value')
in_key = self._search_regex(
r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'key', default=None, group='value')
if not vid or not in_key:
error = self._html_search_regex(
r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
webpage, 'error', default=None)
if error:
raise ExtractorError(error, expected=True)
raise ExtractorError('couldn\'t extract vid and key')
video_data = self._download_json( video_data = self._download_json(
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid, 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
video_id, query={ video_id, query={
'key': in_key, 'key': key,
}) })
meta = video_data['meta'] meta = video_data['meta']
title = meta['subject'] title = meta['subject']
formats = [] formats = []
get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or []
def extract_formats(streams, stream_type, query={}): def extract_formats(streams, stream_type, query={}):
for stream in streams: for stream in streams:
@ -73,7 +38,7 @@ class NaverIE(InfoExtractor):
encoding_option = stream.get('encodingOption', {}) encoding_option = stream.get('encodingOption', {})
bitrate = stream.get('bitrate', {}) bitrate = stream.get('bitrate', {})
formats.append({ formats.append({
'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')), 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))),
'url': stream_url, 'url': stream_url,
'width': int_or_none(encoding_option.get('width')), 'width': int_or_none(encoding_option.get('width')),
'height': int_or_none(encoding_option.get('height')), 'height': int_or_none(encoding_option.get('height')),
@ -83,7 +48,7 @@ class NaverIE(InfoExtractor):
'protocol': 'm3u8_native' if stream_type == 'HLS' else None, 'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
}) })
extract_formats(video_data.get('videos', {}).get('list', []), 'H264') extract_formats(get_list('video'), 'H264')
for stream_set in video_data.get('streams', []): for stream_set in video_data.get('streams', []):
query = {} query = {}
for param in stream_set.get('keys', []): for param in stream_set.get('keys', []):
@ -101,28 +66,101 @@ class NaverIE(InfoExtractor):
'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False)) 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
def get_subs(caption_url):
if re.search(self._CAPTION_EXT_RE, caption_url):
return [{
'url': replace_ext(caption_url, 'ttml'),
}, {
'url': replace_ext(caption_url, 'vtt'),
}]
else:
return [{'url': caption_url}]
automatic_captions = {}
subtitles = {} subtitles = {}
for caption in video_data.get('captions', {}).get('list', []): for caption in get_list('caption'):
caption_url = caption.get('source') caption_url = caption.get('source')
if not caption_url: if not caption_url:
continue continue
subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({ sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
'url': caption_url, sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url))
})
upload_date = self._search_regex( user = meta.get('user', {})
r'<span[^>]+class="date".*?(\d{4}\.\d{2}\.\d{2})',
webpage, 'upload date', fatal=False)
if upload_date:
upload_date = upload_date.replace('.', '')
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'description': self._og_search_description(webpage), 'automatic_captions': automatic_captions,
'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage), 'thumbnail': try_get(meta, lambda x: x['cover']['source']),
'view_count': int_or_none(meta.get('count')), 'view_count': int_or_none(meta.get('count')),
'upload_date': upload_date, 'uploader_id': user.get('id'),
'uploader': user.get('name'),
'uploader_url': user.get('url'),
} }
class NaverIE(NaverBaseIE):
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
_GEO_BYPASS = False
_TESTS = [{
'url': 'http://tv.naver.com/v/81652',
'info_dict': {
'id': '81652',
'ext': 'mp4',
'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
'timestamp': 1378200754,
'upload_date': '20130903',
'uploader': '메가스터디, 합격불변의 법칙',
'uploader_id': 'megastudy',
},
}, {
'url': 'http://tv.naver.com/v/395837',
'md5': '8a38e35354d26a17f73f4e90094febd3',
'info_dict': {
'id': '395837',
'ext': 'mp4',
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
'timestamp': 1432030253,
'upload_date': '20150519',
'uploader': '4가지쇼 시즌2',
'uploader_id': 'wrappinguser29',
},
'skip': 'Georestricted',
}, {
'url': 'http://tvcast.naver.com/v/81652',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
content = self._download_json(
'https://tv.naver.com/api/json/v/' + video_id,
video_id, headers=self.geo_verification_headers())
player_info_json = content.get('playerInfoJson') or {}
current_clip = player_info_json.get('currentClip') or {}
vid = current_clip.get('videoId')
in_key = current_clip.get('inKey')
if not vid or not in_key:
player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
if player_auth == 'notCountry':
self.raise_geo_restricted(countries=['KR'])
elif player_auth == 'notLogin':
self.raise_login_required()
raise ExtractorError('couldn\'t extract vid and key')
info = self._extract_video_info(video_id, vid, in_key)
info.update({
'description': clean_html(current_clip.get('description')),
'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
'duration': parse_duration(current_clip.get('displayPlayTime')),
'like_count': int_or_none(current_clip.get('recommendPoint')),
'age_limit': 19 if current_clip.get('adult') else None,
})
return info

View file

@ -87,11 +87,25 @@ class NBCIE(AdobePassIE):
def _real_extract(self, url): def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups() permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + compat_urllib_parse_unquote(permalink) permalink = 'http' + compat_urllib_parse_unquote(permalink)
response = self._download_json( video_data = self._download_json(
'https://friendship.nbc.co/v2/graphql', video_id, query={ 'https://friendship.nbc.co/v2/graphql', video_id, query={
'query': '''{ 'query': '''query bonanzaPage(
page(name: "%s", platform: web, type: VIDEO, userId: "0") { $app: NBCUBrands! = nbc
data { $name: String!
$oneApp: Boolean
$platform: SupportedPlatforms! = web
$type: EntityPageType! = VIDEO
$userId: String!
) {
bonanzaPage(
app: $app
name: $name
oneApp: $oneApp
platform: $platform
type: $type
userId: $userId
) {
metadata {
... on VideoPageData { ... on VideoPageData {
description description
episodeNumber episodeNumber
@ -100,15 +114,20 @@ class NBCIE(AdobePassIE):
mpxAccountId mpxAccountId
mpxGuid mpxGuid
rating rating
resourceId
seasonNumber seasonNumber
secondaryTitle secondaryTitle
seriesShortTitle seriesShortTitle
} }
} }
} }
}''' % permalink, }''',
}) 'variables': json.dumps({
video_data = response['data']['page']['data'] 'name': permalink,
'oneApp': True,
'userId': '0',
}),
})['data']['bonanzaPage']['metadata']
query = { query = {
'mbr': 'true', 'mbr': 'true',
'manifest': 'm3u', 'manifest': 'm3u',
@ -117,8 +136,8 @@ class NBCIE(AdobePassIE):
title = video_data['secondaryTitle'] title = video_data['secondaryTitle']
if video_data.get('locked'): if video_data.get('locked'):
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
'nbcentertainment', title, video_id, video_data.get('resourceId') or 'nbcentertainment',
video_data.get('rating')) title, video_id, video_data.get('rating'))
query['auth'] = self._extract_mvpd_auth( query['auth'] = self._extract_mvpd_auth(
url, video_id, 'nbcentertainment', resource) url, video_id, 'nbcentertainment', resource)
theplatform_url = smuggle_url(update_url_query( theplatform_url = smuggle_url(update_url_query(

View file

@ -9,6 +9,8 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
qualities, qualities,
try_get,
urljoin,
) )
@ -220,11 +222,17 @@ class NDREmbedBaseIE(InfoExtractor):
upload_date = ppjson.get('config', {}).get('publicationDate') upload_date = ppjson.get('config', {}).get('publicationDate')
duration = int_or_none(config.get('duration')) duration = int_or_none(config.get('duration'))
thumbnails = [{ thumbnails = []
'id': thumbnail.get('quality') or thumbnail_id, poster = try_get(config, lambda x: x['poster'], dict) or {}
'url': thumbnail['src'], for thumbnail_id, thumbnail in poster.items():
'preference': quality_key(thumbnail.get('quality')), thumbnail_url = urljoin(url, thumbnail.get('src'))
} for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')] if not thumbnail_url:
continue
thumbnails.append({
'id': thumbnail.get('quality') or thumbnail_id,
'url': thumbnail_url,
'preference': quality_key(thumbnail.get('quality')),
})
return { return {
'id': video_id, 'id': video_id,

View file

@ -12,6 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
JSON_LD_RE, JSON_LD_RE,
js_to_json,
NO_DEFAULT, NO_DEFAULT,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
@ -105,6 +106,7 @@ class NRKBaseIE(InfoExtractor):
MESSAGES = { MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut', 'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
} }
message_type = data.get('messageType', '') message_type = data.get('messageType', '')
@ -255,6 +257,17 @@ class NRKTVIE(NRKBaseIE):
''' % _EPISODE_RE ''' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{ _TESTS = [{
'url': 'https://tv.nrk.no/program/MDDP12000117',
'md5': '8270824df46ec629b66aeaa5796b36fb',
'info_dict': {
'id': 'MDDP12000117AA',
'ext': 'mp4',
'title': 'Alarm Trolltunga',
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
'duration': 2223,
'age_limit': 6,
},
}, {
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '9a167e54d04671eb6317a37b7bc8a280', 'md5': '9a167e54d04671eb6317a37b7bc8a280',
'info_dict': { 'info_dict': {
@ -266,6 +279,7 @@ class NRKTVIE(NRKBaseIE):
'series': '20 spørsmål', 'series': '20 spørsmål',
'episode': '23.05.2014', 'episode': '23.05.2014',
}, },
'skip': 'NoProgramRights',
}, { }, {
'url': 'https://tv.nrk.no/program/mdfp15000514', 'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': { 'info_dict': {
@ -370,7 +384,24 @@ class NRKTVIE(NRKBaseIE):
class NRKTVEpisodeIE(InfoExtractor): class NRKTVEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)' _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
_TEST = { _TESTS = [{
'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
'info_dict': {
'id': 'MUHH36005220BA',
'ext': 'mp4',
'title': 'Kro, krig og kjærlighet 2:6',
'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
'duration': 1563,
'series': 'Hellums kro',
'season_number': 1,
'episode_number': 2,
'episode': '2:6',
'age_limit': 6,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8', 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
'info_dict': { 'info_dict': {
'id': 'MSUI14000816AA', 'id': 'MSUI14000816AA',
@ -386,7 +417,8 @@ class NRKTVEpisodeIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} 'skip': 'ProgramRightsHasExpired',
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -409,7 +441,7 @@ class NRKTVSerieBaseIE(InfoExtractor):
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'), r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT), webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False) display_id, fatal=False, transform_source=js_to_json)
if not config: if not config:
return return
return try_get( return try_get(
@ -479,6 +511,14 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://tv.nrk.no/serie/blank',
'info_dict': {
'id': 'blank',
'title': 'Blank',
'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
},
'playlist_mincount': 30,
}, {
# new layout, seasons # new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage', 'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': { 'info_dict': {
@ -648,7 +688,7 @@ class NRKSkoleIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099', 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6', 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
'info_dict': { 'info_dict': {
'id': '6021', 'id': '6021',
'ext': 'mp4', 'ext': 'mp4',

View file

@ -6,12 +6,14 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
float_or_none, float_or_none,
HEADRequest, HEADRequest,
int_or_none, int_or_none,
orderedSet, orderedSet,
remove_end, remove_end,
str_or_none,
strip_jsonp, strip_jsonp,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -88,8 +90,11 @@ class ORFTVthekIE(InfoExtractor):
format_id = '-'.join(format_id_list) format_id = '-'.join(format_id_list)
ext = determine_ext(src) ext = determine_ext(src)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id=format_id, fatal=False)) src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
if any('/geoprotection' in f['url'] for f in m3u8_formats):
self.raise_geo_restricted()
formats.extend(m3u8_formats)
elif ext == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id=format_id, fatal=False)) src, video_id, f4m_id=format_id, fatal=False))
@ -161,44 +166,48 @@ class ORFRadioIE(InfoExtractor):
show_date = mobj.group('date') show_date = mobj.group('date')
show_id = mobj.group('show') show_id = mobj.group('show')
if station == 'fm4':
show_id = '4%s' % show_id
data = self._download_json( data = self._download_json(
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date), 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
show_id % (station, show_id, show_date), show_id)
)
def extract_entry_dict(info, title, subtitle): entries = []
return { for info in data['streams']:
'id': info['loopStreamId'].replace('.mp3', ''), loop_stream_id = str_or_none(info.get('loopStreamId'))
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']), if not loop_stream_id:
continue
title = str_or_none(data.get('title'))
if not title:
continue
start = int_or_none(info.get('start'), scale=1000)
end = int_or_none(info.get('end'), scale=1000)
duration = end - start if end and start else None
entries.append({
'id': loop_stream_id.replace('.mp3', ''),
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, loop_stream_id),
'title': title, 'title': title,
'description': subtitle, 'description': clean_html(data.get('subtitle')),
'duration': (info['end'] - info['start']) / 1000, 'duration': duration,
'timestamp': info['start'] / 1000, 'timestamp': start,
'ext': 'mp3', 'ext': 'mp3',
'series': data.get('programTitle') 'series': data.get('programTitle'),
} })
entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': show_id, 'id': show_id,
'title': data['title'], 'title': data.get('title'),
'description': data['subtitle'], 'description': clean_html(data.get('subtitle')),
'entries': entries 'entries': entries,
} }
class ORFFM4IE(ORFRadioIE): class ORFFM4IE(ORFRadioIE):
IE_NAME = 'orf:fm4' IE_NAME = 'orf:fm4'
IE_DESC = 'radio FM4' IE_DESC = 'radio FM4'
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
_TEST = { _TEST = {
'url': 'http://fm4.orf.at/player/20170107/CC', 'url': 'http://fm4.orf.at/player/20170107/4CC',
'md5': '2b0be47375432a7ef104453432a19212', 'md5': '2b0be47375432a7ef104453432a19212',
'info_dict': { 'info_dict': {
'id': '2017-01-07_2100_tl_54_7DaysSat18_31295', 'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
@ -209,7 +218,8 @@ class ORFFM4IE(ORFRadioIE):
'timestamp': 1483819257, 'timestamp': 1483819257,
'upload_date': '20170107', 'upload_date': '20170107',
}, },
'skip': 'Shows from ORF radios are only available for 7 days.' 'skip': 'Shows from ORF radios are only available for 7 days.',
'only_matching': True,
} }

View file

@ -1,99 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
qualities,
)
class PandaTVIE(InfoExtractor):
IE_DESC = '熊猫TV'
_VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.panda.tv/66666',
'info_dict': {
'id': '66666',
'title': 're:.+',
'uploader': '刘杀鸡',
'ext': 'flv',
'is_live': True,
},
'params': {
'skip_download': True,
},
'skip': 'Live stream is offline',
}, {
'url': 'https://www.panda.tv/66666',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0)
if error_code != 0:
raise ExtractorError(
'%s returned error %s: %s'
% (self.IE_NAME, error_code, config['errmsg']),
expected=True)
data = config['data']
video_info = data['videoinfo']
# 2 = live, 3 = offline
if video_info.get('status') != '2':
raise ExtractorError(
'Live stream is offline', expected=True)
title = data['roominfo']['name']
uploader = data.get('hostinfo', {}).get('name')
room_key = video_info['room_key']
stream_addr = video_info.get(
'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
# Reverse engineered from web player swf
# (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
# writing).
plflag0, plflag1 = video_info['plflag'].split('_')
plflag0 = int(plflag0) - 1
if plflag1 == '21':
plflag0 = 10
plflag1 = '4'
live_panda = 'live_panda' if plflag0 < 1 else ''
plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
sign = plflag_auth['auth']['sign']
ts = plflag_auth['auth']['time']
rid = plflag_auth['auth']['rid']
quality_key = qualities(['OD', 'HD', 'SD'])
suffix = ['_small', '_mid', '']
formats = []
for k, v in stream_addr.items():
if v != '1':
continue
quality = quality_key(k)
if quality <= 0:
continue
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
formats.append({
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
'format_id': '%s-%s' % (k, ext),
'quality': quality,
'source_preference': pref,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._live_title(title),
'uploader': uploader,
'formats': formats,
'is_live': True,
}

View file

@ -17,6 +17,7 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
NO_DEFAULT,
orderedSet, orderedSet,
remove_quotes, remove_quotes,
str_to_int, str_to_int,
@ -227,12 +228,13 @@ class PornHubIE(PornHubBaseIE):
else: else:
thumbnail, duration = [None] * 2 thumbnail, duration = [None] * 2
if not video_urls: def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
tv_webpage = dl_webpage('tv')
assignments = self._search_regex( assignments = self._search_regex(
r'(var.+?mediastring.+?)</script>', tv_webpage, pattern, webpage, 'encoded url', default=default)
'encoded url').split(';') if not assignments:
return {}
assignments = assignments.split(';')
js_vars = {} js_vars = {}
@ -254,11 +256,35 @@ class PornHubIE(PornHubBaseIE):
assn = re.sub(r'var\s+', '', assn) assn = re.sub(r'var\s+', '', assn)
vname, value = assn.split('=', 1) vname, value = assn.split('=', 1)
js_vars[vname] = parse_js_value(value) js_vars[vname] = parse_js_value(value)
return js_vars
video_url = js_vars['mediastring'] def add_video_url(video_url):
if video_url not in video_urls_set: v_url = url_or_none(video_url)
video_urls.append((video_url, None)) if not v_url:
video_urls_set.add(video_url) return
if v_url in video_urls_set:
return
video_urls.append((v_url, None))
video_urls_set.add(v_url)
if not video_urls:
FORMAT_PREFIXES = ('media', 'quality')
js_vars = extract_js_vars(
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
default=None)
if js_vars:
for key, format_url in js_vars.items():
if any(key.startswith(p) for p in FORMAT_PREFIXES):
add_video_url(format_url)
if not video_urls and re.search(
r'<[^>]+\bid=["\']lockedPlayer', webpage):
raise ExtractorError(
'Video %s is locked' % video_id, expected=True)
if not video_urls:
js_vars = extract_js_vars(
dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
add_video_url(js_vars['mediastring'])
for mobj in re.finditer( for mobj in re.finditer(
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
@ -276,10 +302,16 @@ class PornHubIE(PornHubBaseIE):
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date: if upload_date:
upload_date = upload_date.replace('/', '') upload_date = upload_date.replace('/', '')
if determine_ext(video_url) == 'mpd': ext = determine_ext(video_url)
if ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))
continue continue
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
tbr = None tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
if mobj: if mobj:

View file

@ -16,7 +16,7 @@ from ..utils import (
class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1BaseIE(InfoExtractor):
_GEO_COUNTRIES = ['DE'] _GEO_BYPASS = False
_ACCESS_ID = None _ACCESS_ID = None
_SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
_V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
@ -39,14 +39,18 @@ class ProSiebenSat1BaseIE(InfoExtractor):
formats = [] formats = []
if self._ACCESS_ID: if self._ACCESS_ID:
raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
server_token = (self._download_json( protocols = self._download_json(
self._V4_BASE_URL + 'protocols', clip_id, self._V4_BASE_URL + 'protocols', clip_id,
'Downloading protocols JSON', 'Downloading protocols JSON',
headers=self.geo_verification_headers(), query={ headers=self.geo_verification_headers(), query={
'access_id': self._ACCESS_ID, 'access_id': self._ACCESS_ID,
'client_token': sha1((raw_ct).encode()).hexdigest(), 'client_token': sha1((raw_ct).encode()).hexdigest(),
'video_id': clip_id, 'video_id': clip_id,
}, fatal=False) or {}).get('server_token') }, fatal=False, expected_status=(403,)) or {}
error = protocols.get('error') or {}
if error.get('title') == 'Geo check failed':
self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
server_token = protocols.get('server_token')
if server_token: if server_token:
urls = (self._download_json( urls = (self._download_json(
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={

View file

@ -43,8 +43,15 @@ class RedTubeIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.redtube.com/%s' % video_id, video_id) 'http://www.redtube.com/%s' % video_id, video_id)
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): ERRORS = (
raise ExtractorError('Video %s has been removed' % video_id, expected=True) (('video-deleted-info', '>This video has been removed'), 'has been removed'),
(('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
)
for patterns, message in ERRORS:
if any(p in webpage for p in patterns):
raise ExtractorError(
'Video %s %s' % (video_id, message), expected=True)
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})

View file

@ -165,7 +165,8 @@ class SafariIE(SafariBaseIE):
kaltura_session = self._download_json( kaltura_session = self._download_json(
'%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
video_id, 'Downloading kaltura session JSON', video_id, 'Downloading kaltura session JSON',
'Unable to download kaltura session JSON', fatal=False) 'Unable to download kaltura session JSON', fatal=False,
headers={'Accept': 'application/json'})
if kaltura_session: if kaltura_session:
session = kaltura_session.get('session') session = kaltura_session.get('session')
if session: if session:

View file

@ -7,6 +7,7 @@ import re
from .aws import AWSIE from .aws import AWSIE
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .common import InfoExtractor
from ..utils import ( from ..utils import (
smuggle_url, smuggle_url,
urlencode_postdata, urlencode_postdata,
@ -102,3 +103,50 @@ class ScrippsNetworksWatchIE(AWSIE):
'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
{'geo_countries': ['US']}), {'geo_countries': ['US']}),
AnvatoIE.ie_key(), video_id=mcp_id) AnvatoIE.ie_key(), video_id=mcp_id)
class ScrippsNetworksIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
'info_dict': {
'id': '0260338',
'ext': 'mp4',
'title': 'The Best of the Best',
'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
'timestamp': 1475678834,
'upload_date': '20161005',
'uploader': 'SCNI-SCND',
},
'add_ie': ['ThePlatform'],
}, {
'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
'only_matching': True,
}, {
'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
'only_matching': True,
}, {
'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
'only_matching': True,
}, {
'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
'only_matching': True,
}, {
'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
'only_matching': True,
}]
_ACCOUNT_MAP = {
'cookingchanneltv': 2433005105,
'discovery': 2706091867,
'diynetwork': 2433004575,
'foodnetwork': 2433005105,
'hgtv': 2433004575,
'travelchannel': 2433005739,
}
_TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
def _real_extract(self, url):
site, guid = re.match(self._VALID_URL, url).groups()
return self.url_result(smuggle_url(
self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
{'force_smil_url': True}), 'ThePlatform', guid)

View file

@ -2,7 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import smuggle_url
class SlidesLiveIE(InfoExtractor): class SlidesLiveIE(InfoExtractor):
@ -14,9 +14,9 @@ class SlidesLiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'LMtgR8ba0b0', 'id': 'LMtgR8ba0b0',
'ext': 'mp4', 'ext': 'mp4',
'title': '38902413: external video', 'title': 'GCC IA16 backend',
'description': '3890241320170925-9-1yd6ech.mp4', 'description': 'Watch full version of this video at https://slideslive.com/38902413.',
'uploader': 'SlidesLive Administrator', 'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'upload_date': '20170925', 'upload_date': '20170925',
} }
@ -24,16 +24,38 @@ class SlidesLiveIE(InfoExtractor):
# video_service_name = youtube # video_service_name = youtube
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
'only_matching': True, 'only_matching': True,
}, {
# video_service_name = url
'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
'only_matching': True,
}, {
# video_service_name = vimeo
'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
url, video_id, headers={'Accept': 'application/json'}) 'https://ben.slideslive.com/player/' + video_id, video_id)
service_name = video_data['video_service_name'].lower() service_name = video_data['video_service_name'].lower()
if service_name == 'youtube': assert service_name in ('url', 'vimeo', 'youtube')
yt_video_id = video_data['video_service_id'] service_id = video_data['video_service_id']
return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) info = {
'id': video_id,
'thumbnail': video_data.get('thumbnail'),
'url': service_id,
}
if service_name == 'url':
info['title'] = video_data['title']
else: else:
raise ExtractorError( info.update({
'Unsupported service name: {0}'.format(service_name), expected=True) '_type': 'url_transparent',
'ie_key': service_name.capitalize(),
'title': video_data.get('title'),
})
if service_name == 'vimeo':
info['url'] = smuggle_url(
'https://player.vimeo.com/video/' + service_id,
{'http_headers': {'Referer': url}})
return info

View file

@ -9,10 +9,13 @@ from .common import (
SearchInfoExtractor SearchInfoExtractor
) )
from ..compat import ( from ..compat import (
compat_HTTPError,
compat_kwargs,
compat_str, compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
error_to_compat_str,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
HEADRequest, HEADRequest,
@ -28,7 +31,12 @@ from ..utils import (
class SoundcloudEmbedIE(InfoExtractor): class SoundcloudEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P<id>.*)' _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
_TEST = {
# from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
'only_matching': True,
}
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -37,8 +45,13 @@ class SoundcloudEmbedIE(InfoExtractor):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result(compat_urlparse.parse_qs( query = compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)['url'][0]) compat_urlparse.urlparse(url).query)
api_url = query['url'][0]
secret_token = query.get('secret_token')
if secret_token:
api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
return self.url_result(api_url)
class SoundcloudIE(InfoExtractor): class SoundcloudIE(InfoExtractor):
@ -83,7 +96,7 @@ class SoundcloudIE(InfoExtractor):
'repost_count': int, 'repost_count': int,
} }
}, },
# not streamable song # not streamable song, preview
{ {
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': { 'info_dict': {
@ -106,7 +119,6 @@ class SoundcloudIE(InfoExtractor):
# rtmp # rtmp
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Preview',
}, },
# private link # private link
{ {
@ -245,7 +257,6 @@ class SoundcloudIE(InfoExtractor):
_API_BASE = 'https://api.soundcloud.com/' _API_BASE = 'https://api.soundcloud.com/'
_API_V2_BASE = 'https://api-v2.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/' _BASE_URL = 'https://soundcloud.com/'
_CLIENT_ID = 'UW9ajvMgVdMMW3cdeBi8lPfN6dvOVGji'
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
_ARTWORK_MAP = { _ARTWORK_MAP = {
@ -261,9 +272,49 @@ class SoundcloudIE(InfoExtractor):
'original': 0, 'original': 0,
} }
def _store_client_id(self, client_id):
self._downloader.cache.store('soundcloud', 'client_id', client_id)
def _update_client_id(self):
webpage = self._download_webpage('https://soundcloud.com/', None)
for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
script = self._download_webpage(src, None, fatal=False)
if script:
client_id = self._search_regex(
r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
script, 'client id', default=None)
if client_id:
self._CLIENT_ID = client_id
self._store_client_id(client_id)
return
raise ExtractorError('Unable to extract client id')
def _download_json(self, *args, **kwargs):
non_fatal = kwargs.get('fatal') is False
if non_fatal:
del kwargs['fatal']
query = kwargs.get('query', {}).copy()
for _ in range(2):
query['client_id'] = self._CLIENT_ID
kwargs['query'] = query
try:
return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self._store_client_id(None)
self._update_client_id()
continue
elif non_fatal:
self._downloader.report_warning(error_to_compat_str(e))
return False
raise
def _real_initialize(self):
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
@ -294,9 +345,9 @@ class SoundcloudIE(InfoExtractor):
}) })
def invalid_url(url): def invalid_url(url):
return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url) return not url or url in format_urls
def add_format(f, protocol): def add_format(f, protocol, is_preview=False):
mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url) mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
if mobj: if mobj:
for k, v in mobj.groupdict().items(): for k, v in mobj.groupdict().items():
@ -309,12 +360,16 @@ class SoundcloudIE(InfoExtractor):
v = f.get(k) v = f.get(k)
if v: if v:
format_id_list.append(v) format_id_list.append(v)
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
if preview:
format_id_list.append('preview')
abr = f.get('abr') abr = f.get('abr')
if abr: if abr:
f['abr'] = int(abr) f['abr'] = int(abr)
f.update({ f.update({
'format_id': '_'.join(format_id_list), 'format_id': '_'.join(format_id_list),
'protocol': 'm3u8_native' if protocol == 'hls' else 'http', 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
'preference': -10 if preview else None,
}) })
formats.append(f) formats.append(f)
@ -325,7 +380,7 @@ class SoundcloudIE(InfoExtractor):
if not isinstance(t, dict): if not isinstance(t, dict):
continue continue
format_url = url_or_none(t.get('url')) format_url = url_or_none(t.get('url'))
if not format_url or t.get('snipped') or '/preview/' in format_url: if not format_url:
continue continue
stream = self._download_json( stream = self._download_json(
format_url, track_id, query=query, fatal=False) format_url, track_id, query=query, fatal=False)
@ -348,7 +403,8 @@ class SoundcloudIE(InfoExtractor):
add_format({ add_format({
'url': stream_url, 'url': stream_url,
'ext': ext, 'ext': ext,
}, 'http' if protocol == 'progressive' else protocol) }, 'http' if protocol == 'progressive' else protocol,
t.get('snipped') or '/preview/' in format_url)
if not formats: if not formats:
# Old API, does not work for some tracks (e.g. # Old API, does not work for some tracks (e.g.
@ -441,9 +497,7 @@ class SoundcloudIE(InfoExtractor):
track_id = mobj.group('track_id') track_id = mobj.group('track_id')
query = { query = {}
'client_id': self._CLIENT_ID,
}
if track_id: if track_id:
info_json_url = self._API_V2_BASE + 'tracks/' + track_id info_json_url = self._API_V2_BASE + 'tracks/' + track_id
full_title = track_id full_title = track_id
@ -526,7 +580,6 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
COMMON_QUERY = { COMMON_QUERY = {
'limit': 2000000000, 'limit': 2000000000,
'client_id': self._CLIENT_ID,
'linked_partitioning': '1', 'linked_partitioning': '1',
} }
@ -712,9 +765,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
query = { query = {}
'client_id': self._CLIENT_ID,
}
token = mobj.group('token') token = mobj.group('token')
if token: if token:
query['secret_token'] = token query['secret_token'] = token
@ -751,7 +802,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
self._MAX_RESULTS_PER_PAGE) self._MAX_RESULTS_PER_PAGE)
query.update({ query.update({
'limit': limit, 'limit': limit,
'client_id': self._CLIENT_ID,
'linked_partitioning': 1, 'linked_partitioning': 1,
'offset': 0, 'offset': 0,
}) })

View file

@ -4,6 +4,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
merge_dicts, merge_dicts,
orderedSet, orderedSet,
@ -64,7 +65,7 @@ class SpankBangIE(InfoExtractor):
url.replace('/%s/embed' % video_id, '/%s/video' % video_id), url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
video_id, headers={'Cookie': 'country=US'}) video_id, headers={'Cookie': 'country=US'})
if re.search(r'<[^>]+\bid=["\']video_removed', webpage): if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
raise ExtractorError( raise ExtractorError(
'Video %s is not available' % video_id, expected=True) 'Video %s is not available' % video_id, expected=True)
@ -75,11 +76,20 @@ class SpankBangIE(InfoExtractor):
if not f_url: if not f_url:
return return
f = parse_resolution(format_id) f = parse_resolution(format_id)
f.update({ ext = determine_ext(f_url)
'url': f_url, if format_id.startswith('m3u8') or ext == 'm3u8':
'format_id': format_id, formats.extend(self._extract_m3u8_formats(
}) f_url, video_id, 'mp4', entry_protocol='m3u8_native',
formats.append(f) m3u8_id='hls', fatal=False))
elif format_id.startswith('mpd') or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
f_url, video_id, mpd_id='dash', fatal=False))
elif ext == 'mp4' or f.get('width') or f.get('height'):
f.update({
'url': f_url,
'format_id': format_id,
})
formats.append(f)
STREAM_URL_PREFIX = 'stream_url_' STREAM_URL_PREFIX = 'stream_url_'
@ -93,28 +103,22 @@ class SpankBangIE(InfoExtractor):
r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'stream key', group='value') webpage, 'stream key', group='value')
sb_csrf_session = self._get_cookies(
'https://spankbang.com')['sb_csrf_session'].value
stream = self._download_json( stream = self._download_json(
'https://spankbang.com/api/videos/stream', video_id, 'https://spankbang.com/api/videos/stream', video_id,
'Downloading stream JSON', data=urlencode_postdata({ 'Downloading stream JSON', data=urlencode_postdata({
'id': stream_key, 'id': stream_key,
'data': 0, 'data': 0,
'sb_csrf_session': sb_csrf_session,
}), headers={ }), headers={
'Referer': url, 'Referer': url,
'X-CSRFToken': sb_csrf_session, 'X-Requested-With': 'XMLHttpRequest',
}) })
for format_id, format_url in stream.items(): for format_id, format_url in stream.items():
if format_id.startswith(STREAM_URL_PREFIX): if format_url and isinstance(format_url, list):
if format_url and isinstance(format_url, list): format_url = format_url[0]
format_url = format_url[0] extract_format(format_id, format_url)
extract_format(
format_id[len(STREAM_URL_PREFIX):], format_url)
self._sort_formats(formats) self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})

View file

@ -165,8 +165,12 @@ class TeachableIE(TeachableBaseIE):
if any(re.search(p, webpage) for p in ( if any(re.search(p, webpage) for p in (
r'class=["\']lecture-contents-locked', r'class=["\']lecture-contents-locked',
r'>\s*Lecture contents locked', r'>\s*Lecture contents locked',
r'id=["\']lecture-locked')): r'id=["\']lecture-locked',
# https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313
r'class=["\'](?:inner-)?lesson-locked',
r'>LESSON LOCKED<')):
self.raise_login_required('Lecture contents locked') self.raise_login_required('Lecture contents locked')
raise ExtractorError('Unable to find video URL')
title = self._og_search_title(webpage, default=None) title = self._og_search_title(webpage, default=None)

View file

@ -1,21 +1,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
dict_get,
float_or_none,
int_or_none,
unified_timestamp,
update_url_query,
url_or_none,
)
class TruNewsIE(InfoExtractor): class TruNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
'md5': 'a19c024c3906ff954fac9b96ce66bb08',
'info_dict': { 'info_dict': {
'id': '5c5a21e65d3c196e1c0020cc', 'id': '5c5a21e65d3c196e1c0020cc',
'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
@ -28,48 +19,16 @@ class TruNewsIE(InfoExtractor):
}, },
'add_ie': ['Zype'], 'add_ie': ['Zype'],
} }
_ZYPE_TEMPL = 'https://player.zype.com/embed/%s.js?api_key=X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
video = self._download_json( zype_id = self._download_json(
'https://api.zype.com/videos', display_id, query={ 'https://api.zype.com/videos', display_id, query={
'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H', 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
'per_page': 1, 'per_page': 1,
'active': 'true', 'active': 'true',
'friendly_title': display_id, 'friendly_title': display_id,
})['response'][0] })['response'][0]['_id']
return self.url_result(self._ZYPE_TEMPL % zype_id, 'Zype', zype_id)
zype_id = video['_id']
thumbnails = []
thumbnails_list = video.get('thumbnails')
if isinstance(thumbnails_list, list):
for thumbnail in thumbnails_list:
if not isinstance(thumbnail, dict):
continue
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return {
'_type': 'url_transparent',
'url': update_url_query(
'https://player.zype.com/embed/%s.js' % zype_id,
{'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
'ie_key': 'Zype',
'id': zype_id,
'display_id': display_id,
'title': video.get('title'),
'description': dict_get(video, ('description', 'ott_description', 'short_description')),
'duration': int_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('published_at')),
'average_rating': float_or_none(video.get('rating')),
'view_count': int_or_none(video.get('request_count')),
'thumbnails': thumbnails,
}

View file

@ -1,10 +1,16 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import extract_attributes from ..utils import (
determine_ext,
extract_attributes,
js_to_json,
url_or_none,
)
class TV2DKIE(InfoExtractor): class TV2DKIE(InfoExtractor):
@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor):
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
video_id=kaltura_id)) video_id=kaltura_id))
return self.playlist_result(entries) return self.playlist_result(entries)
class TV2DKBornholmPlayIE(InfoExtractor):
_VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
_TEST = {
'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
'info_dict': {
'id': '781021',
'ext': 'mp4',
'title': '12Nyheder-27.11.19',
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
data=json.dumps({
'playlist_id': video_id,
'serienavn': '',
}).encode(), headers={
'X-Requested-With': 'XMLHttpRequest',
'Content-Type': 'application/json; charset=UTF-8',
})['d']
# TODO: generalize flowplayer
title = self._search_regex(
r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
group='value')
sources = self._parse_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
video_id, js_to_json)
formats = []
srcs = set()
for source in sources:
src = url_or_none(source.get('src'))
if not src:
continue
if src in srcs:
continue
srcs.add(src)
ext = determine_ext(src)
src_type = source.get('type')
if src_type == 'application/x-mpegurl' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif src_type == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
}

View file

@ -17,12 +17,10 @@ from ..compat import (
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
qualities,
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -327,6 +325,7 @@ class TwitchVodIE(TwitchItemBaseIE):
'allow_audio_only': 'true', 'allow_audio_only': 'true',
'allow_spectre': 'true', 'allow_spectre': 'true',
'player': 'twitchweb', 'player': 'twitchweb',
'playlist_include_framerate': 'true',
'nauth': access_token['token'], 'nauth': access_token['token'],
'nauthsig': access_token['sig'], 'nauthsig': access_token['sig'],
})), })),
@ -598,6 +597,7 @@ class TwitchStreamIE(TwitchBaseIE):
'allow_spectre': 'true', 'allow_spectre': 'true',
'p': random.randint(1000000, 10000000), 'p': random.randint(1000000, 10000000),
'player': 'twitchweb', 'player': 'twitchweb',
'playlist_include_framerate': 'true',
'segment_preference': '4', 'segment_preference': '4',
'sig': access_token['sig'].encode('utf-8'), 'sig': access_token['sig'].encode('utf-8'),
'token': access_token['token'].encode('utf-8'), 'token': access_token['token'].encode('utf-8'),
@ -674,63 +674,81 @@ class TwitchClipsIE(TwitchBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
status = self._download_json( clip = self._download_json(
'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id, 'https://gql.twitch.tv/gql', video_id, data=json.dumps({
video_id) 'query': '''{
clip(slug: "%s") {
broadcaster {
displayName
}
createdAt
curator {
displayName
id
}
durationSeconds
id
tiny: thumbnailURL(width: 86, height: 45)
small: thumbnailURL(width: 260, height: 147)
medium: thumbnailURL(width: 480, height: 272)
title
videoQualities {
frameRate
quality
sourceURL
}
viewCount
}
}''' % video_id,
}).encode(), headers={
'Client-ID': self._CLIENT_ID,
})['data']['clip']
if not clip:
raise ExtractorError(
'This clip is no longer available', expected=True)
formats = [] formats = []
for option in clip.get('videoQualities', []):
for option in status['quality_options']:
if not isinstance(option, dict): if not isinstance(option, dict):
continue continue
source = url_or_none(option.get('source')) source = url_or_none(option.get('sourceURL'))
if not source: if not source:
continue continue
formats.append({ formats.append({
'url': source, 'url': source,
'format_id': option.get('quality'), 'format_id': option.get('quality'),
'height': int_or_none(option.get('quality')), 'height': int_or_none(option.get('quality')),
'fps': int_or_none(option.get('frame_rate')), 'fps': int_or_none(option.get('frameRate')),
}) })
self._sort_formats(formats) self._sort_formats(formats)
info = { thumbnails = []
for thumbnail_id in ('tiny', 'small', 'medium'):
thumbnail_url = clip.get(thumbnail_id)
if not thumbnail_url:
continue
thumb = {
'id': thumbnail_id,
'url': thumbnail_url,
}
mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
if mobj:
thumb.update({
'height': int(mobj.group(2)),
'width': int(mobj.group(1)),
})
thumbnails.append(thumb)
return {
'id': clip.get('id') or video_id,
'title': clip.get('title') or video_id,
'formats': formats, 'formats': formats,
'duration': int_or_none(clip.get('durationSeconds')),
'views': int_or_none(clip.get('viewCount')),
'timestamp': unified_timestamp(clip.get('createdAt')),
'thumbnails': thumbnails,
'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
} }
clip = self._call_api(
'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
'Accept': 'application/vnd.twitchtv.v5+json',
})
if clip:
quality_key = qualities(('tiny', 'small', 'medium'))
thumbnails = []
thumbnails_dict = clip.get('thumbnails')
if isinstance(thumbnails_dict, dict):
for thumbnail_id, thumbnail_url in thumbnails_dict.items():
thumbnails.append({
'id': thumbnail_id,
'url': thumbnail_url,
'preference': quality_key(thumbnail_id),
})
info.update({
'id': clip.get('tracking_id') or video_id,
'title': clip.get('title') or video_id,
'duration': float_or_none(clip.get('duration')),
'views': int_or_none(clip.get('views')),
'timestamp': unified_timestamp(clip.get('created_at')),
'thumbnails': thumbnails,
'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
})
else:
info.update({
'title': video_id,
'id': video_id,
})
return info

View file

@ -251,10 +251,10 @@ class TwitterIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '700207533655363584', 'id': '700207533655363584',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Simon Vertugo - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'simon vetugo - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'Simon Vertugo', 'uploader': 'simon vetugo',
'uploader_id': 'simonvertugo', 'uploader_id': 'simonvertugo',
'duration': 30.0, 'duration': 30.0,
'timestamp': 1455777459, 'timestamp': 1455777459,
@ -376,6 +376,10 @@ class TwitterIE(TwitterBaseIE):
# Twitch Clip Embed # Twitch Clip Embed
'url': 'https://twitter.com/GunB1g/status/1163218564784017422', 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
'only_matching': True, 'only_matching': True,
}, {
# promo_video_website card
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -458,10 +462,11 @@ class TwitterIE(TwitterBaseIE):
return try_get(o, lambda x: x[x['type'].lower() + '_value']) return try_get(o, lambda x: x[x['type'].lower() + '_value'])
card_name = card['name'].split(':')[-1] card_name = card['name'].split(':')[-1]
if card_name == 'amplify': if card_name in ('amplify', 'promo_video_website'):
formats = self._extract_formats_from_vmap_url( is_amplify = card_name == 'amplify'
get_binding_value('amplify_url_vmap'), vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
get_binding_value('amplify_content_id') or twid) content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [] thumbnails = []

View file

@ -1,73 +1,16 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .imggaming import ImgGamingBaseIE
from ..utils import (
ExtractorError,
parse_duration,
parse_iso8601,
urlencode_postdata,
)
class UFCTVIE(InfoExtractor): class UFCTVIE(ImgGamingBaseIE):
_VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)' _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
_NETRC_MACHINE = 'ufctv' _NETRC_MACHINE = 'ufctv'
_TEST = { _REALM = 'ufc'
'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
'info_dict': {
'id': '34167',
'ext': 'mp4',
'title': 'UFC 219 Countdown: Full Episode',
'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
'timestamp': 1513962360,
'upload_date': '20171222',
},
'params': {
# m3u8 download
'skip_download': True,
}
}
def _real_initialize(self):
username, password = self._get_login_info()
if username is None:
return
code = self._download_json( class UFCArabiaIE(ImgGamingBaseIE):
'https://www.ufc.tv/secure/authenticate', _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
None, 'Logging in', data=urlencode_postdata({ _NETRC_MACHINE = 'ufcarabia'
'username': username, _REALM = 'admufc'
'password': password,
'format': 'json',
})).get('code')
if code and code != 'loginsuccess':
raise ExtractorError(code, expected=True)
def _real_extract(self, url):
display_id = self._match_id(url)
video_data = self._download_json(url, display_id, query={
'format': 'json',
})
video_id = str(video_data['id'])
title = video_data['name']
m3u8_url = self._download_json(
'https://www.ufc.tv/service/publishpoint', video_id, query={
'type': 'video',
'format': 'json',
'id': video_id,
}, headers={
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
})['path']
m3u8_url = m3u8_url.replace('_iphone.', '.')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'duration': parse_duration(video_data.get('runtime')),
'timestamp': parse_iso8601(video_data.get('releaseDate')),
'formats': formats,
}

View file

@ -1,35 +1,50 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import functools
import time
import hashlib import hashlib
import json import json
import random import random
import re
import time
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from .youtube import YoutubeIE
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
OnDemandPagedList,
parse_age_limit, parse_age_limit,
str_or_none, str_or_none,
try_get, try_get,
) )
class ViceIE(AdobePassIE): class ViceBaseIE(InfoExtractor):
def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
return self._download_json(
'https://video.vice.com/api/v1/graphql', resource_id, query={
'query': '''{
%s(locale: "%s", %s: "%s"%s) {
%s
}
}''' % (resource, locale, resource_key, resource_id, args, fields),
})['data'][resource]
class ViceIE(ViceBaseIE, AdobePassIE):
IE_NAME = 'vice' IE_NAME = 'vice'
_VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)' _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
_TESTS = [{ _TESTS = [{
'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7', 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
'info_dict': { 'info_dict': {
'id': '5e647f0125e145c9aef2069412c0cbde', 'id': '58c69e38a55424f1227dc3f7',
'ext': 'mp4', 'ext': 'mp4',
'title': '10 Questions You Always Wanted To Ask: Pet Cremator', 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5', 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
@ -43,17 +58,16 @@ class ViceIE(AdobePassIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['UplynkPreplay'],
}, { }, {
# geo restricted to US # geo restricted to US
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
'info_dict': { 'info_dict': {
'id': '930c0ad1f47141cc955087eecaddb0e2', 'id': '5816510690b70e6c5fd39a56',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'waypoint', 'uploader': 'vice',
'title': 'The Signal From Tölva', 'title': 'The Signal From Tölva',
'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
'uploader_id': '57f7d621e05ca860fa9ccaf9', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1477941983, 'timestamp': 1477941983,
'upload_date': '20161031', 'upload_date': '20161031',
}, },
@ -61,15 +75,14 @@ class ViceIE(AdobePassIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
'info_dict': { 'info_dict': {
'id': '581b12b60a0e1f4c0fb6ea2f', 'id': '581b12b60a0e1f4c0fb6ea2f',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>', 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
'uploader': 'VICE', 'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1485368119, 'timestamp': 1485368119,
'upload_date': '20170125', 'upload_date': '20170125',
@ -78,9 +91,7 @@ class ViceIE(AdobePassIE):
'params': { 'params': {
# AES-encrypted m3u8 # AES-encrypted m3u8
'skip_download': True, 'skip_download': True,
'proxy': '127.0.0.1:8118',
}, },
'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
'only_matching': True, 'only_matching': True,
@ -98,7 +109,7 @@ class ViceIE(AdobePassIE):
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return re.findall( return re.findall(
r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)', r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
webpage) webpage)
@staticmethod @staticmethod
@ -109,31 +120,16 @@ class ViceIE(AdobePassIE):
def _real_extract(self, url): def _real_extract(self, url):
locale, video_id = re.match(self._VALID_URL, url).groups() locale, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage( video = self._call_api('videos', 'id', video_id, locale, '''body
'https://video.vice.com/%s/embed/%s' % (locale, video_id), locked
video_id) rating
thumbnail_url
video = self._parse_json( title''')[0]
self._search_regex( title = video['title'].strip()
r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
'app state'), video_id)['video']
video_id = video.get('vms_id') or video.get('id') or video_id
title = video['title']
is_locked = video.get('locked')
rating = video.get('rating') rating = video.get('rating')
thumbnail = video.get('thumbnail_url')
duration = int_or_none(video.get('duration'))
series = try_get(
video, lambda x: x['episode']['season']['show']['title'],
compat_str)
episode_number = try_get(
video, lambda x: x['episode']['episode_number'])
season_number = try_get(
video, lambda x: x['episode']['season']['season_number'])
uploader = None
query = {} query = {}
if is_locked: if video.get('locked'):
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
'VICELAND', title, video_id, rating) 'VICELAND', title, video_id, rating)
query['tvetoken'] = self._extract_mvpd_auth( query['tvetoken'] = self._extract_mvpd_auth(
@ -148,12 +144,9 @@ class ViceIE(AdobePassIE):
query.update({ query.update({
'exp': exp, 'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
'_ad_blocked': None, 'skipadstitching': 1,
'_ad_unit': '',
'_debug': '',
'platform': 'desktop', 'platform': 'desktop',
'rn': random.randint(10000, 100000), 'rn': random.randint(10000, 100000),
'fbprebidtoken': '',
}) })
try: try:
@ -169,85 +162,94 @@ class ViceIE(AdobePassIE):
raise raise
video_data = preplay['video'] video_data = preplay['video']
base = video_data['base'] formats = self._extract_m3u8_formats(
uplynk_preplay_url = preplay['preplayURL'] preplay['playURL'], video_id, 'mp4', 'm3u8_native')
episode = video_data.get('episode', {}) self._sort_formats(formats)
channel = video_data.get('channel', {}) episode = video_data.get('episode') or {}
channel = video_data.get('channel') or {}
season = video_data.get('season') or {}
subtitles = {} subtitles = {}
cc_url = preplay.get('ccURL') for subtitle in preplay.get('subtitleURLs', []):
if cc_url: cc_url = subtitle.get('url')
subtitles['en'] = [{ if not cc_url:
continue
language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
subtitles.setdefault(language_code, []).append({
'url': cc_url, 'url': cc_url,
}] })
return { return {
'_type': 'url_transparent', 'formats': formats,
'url': uplynk_preplay_url,
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': base.get('body') or base.get('display_body'), 'description': clean_html(video.get('body')),
'thumbnail': thumbnail, 'thumbnail': video.get('thumbnail_url'),
'duration': int_or_none(video_data.get('video_duration')) or duration, 'duration': int_or_none(video_data.get('video_duration')),
'timestamp': int_or_none(video_data.get('created_at'), 1000), 'timestamp': int_or_none(video_data.get('created_at'), 1000),
'age_limit': parse_age_limit(video_data.get('video_rating')), 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
'series': video_data.get('show_title') or series, 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
'episode_number': int_or_none(episode.get('episode_number') or episode_number), 'episode_number': int_or_none(episode.get('episode_number')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(season_number), 'season_number': int_or_none(season.get('season_number')),
'season_id': str_or_none(episode.get('season_id')), 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader, 'uploader': channel.get('name'),
'uploader_id': str_or_none(channel.get('id')), 'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles, 'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
} }
class ViceShowIE(InfoExtractor): class ViceShowIE(ViceBaseIE):
IE_NAME = 'vice:show' IE_NAME = 'vice:show'
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
_PAGE_SIZE = 25
_TEST = { _TESTS = [{
'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2', 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
'info_dict': { 'info_dict': {
'id': 'fuck-thats-delicious-2', 'id': '57a2040c8cb727dec794c901',
'title': "Fuck, That's Delicious", 'title': 'F*ck, Thats Delicious',
'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.', 'description': 'The life and eating habits of raps greatest bon vivant, Action Bronson.',
}, },
'playlist_count': 17, 'playlist_mincount': 64,
} }, {
'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
'only_matching': True,
}]
def _fetch_page(self, locale, show_id, page):
videos = self._call_api('videos', 'show_id', show_id, locale, '''body
id
url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
for video in videos:
yield self.url_result(
video['url'], ViceIE.ie_key(), video.get('id'))
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) locale, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, show_id) show = self._call_api('shows', 'slug', display_id, locale, '''dek
id
title''')[0]
show_id = show['id']
entries = [ entries = OnDemandPagedList(
self.url_result(video_url, ViceIE.ie_key()) functools.partial(self._fetch_page, locale, show_id),
for video_url, _ in re.findall( self._PAGE_SIZE)
r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
% ViceIE._VALID_URL, webpage)]
title = self._search_regex( return self.playlist_result(
r'<title>(.+?)</title>', webpage, 'title', default=None) entries, show_id, show.get('title'), show.get('dek'))
if title:
title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
description = self._html_search_meta(
'description', webpage, 'description')
return self.playlist_result(entries, show_id, title, description)
class ViceArticleIE(InfoExtractor): class ViceArticleIE(ViceBaseIE):
IE_NAME = 'vice:article' IE_NAME = 'vice:article'
_VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)' _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
'info_dict': { 'info_dict': {
'id': '41eae2a47b174a1398357cec55f1f6fc', 'id': '58dc0a3dee202d2a0ccfcbd8',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mormon War on Porn ', 'title': 'Mormon War on Porn',
'description': 'md5:6394a8398506581d0346b9ab89093fef', 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
'uploader': 'vice', 'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1491883129, 'timestamp': 1491883129,
@ -258,10 +260,10 @@ class ViceArticleIE(InfoExtractor):
# AES-encrypted m3u8 # AES-encrypted m3u8
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['UplynkPreplay'], 'add_ie': [ViceIE.ie_key()],
}, { }, {
'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
'md5': '7fe8ebc4fa3323efafc127b82bd821d9', 'md5': '13010ee0bc694ea87ec40724397c2349',
'info_dict': { 'info_dict': {
'id': '3jstaBeXgAs', 'id': '3jstaBeXgAs',
'ext': 'mp4', 'ext': 'mp4',
@ -271,15 +273,15 @@ class ViceArticleIE(InfoExtractor):
'uploader_id': 'MotherboardTV', 'uploader_id': 'MotherboardTV',
'upload_date': '20140529', 'upload_date': '20140529',
}, },
'add_ie': ['Youtube'], 'add_ie': [YoutubeIE.ie_key()],
}, { }, {
'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded', 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': { 'info_dict': {
'id': 'e2ed435eb67e43efb66e6ef9a6930a88', 'id': '57f41d3556a0a80f54726060',
'ext': 'mp4', 'ext': 'mp4',
'title': "Making The World's First Male Sex Doll", 'title': "Making The World's First Male Sex Doll",
'description': 'md5:916078ef0e032d76343116208b6cc2c4', 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
'uploader': 'vice', 'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1476919911, 'timestamp': 1476919911,
@ -288,6 +290,7 @@ class ViceArticleIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
'format': 'bestvideo',
}, },
'add_ie': [ViceIE.ie_key()], 'add_ie': [ViceIE.ie_key()],
}, { }, {
@ -299,14 +302,11 @@ class ViceArticleIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) locale, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) article = self._call_api('articles', 'slug', display_id, locale, '''body
embed_code''')[0]
prefetch_data = self._parse_json(self._search_regex( body = article['body']
r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
webpage, 'app state'), display_id)['pageData']
body = prefetch_data['body']
def _url_res(video_url, ie_key): def _url_res(video_url, ie_key):
return { return {
@ -316,7 +316,7 @@ class ViceArticleIE(InfoExtractor):
'ie_key': ie_key, 'ie_key': ie_key,
} }
vice_url = ViceIE._extract_url(webpage) vice_url = ViceIE._extract_url(body)
if vice_url: if vice_url:
return _url_res(vice_url, ViceIE.ie_key()) return _url_res(vice_url, ViceIE.ie_key())
@ -332,6 +332,6 @@ class ViceArticleIE(InfoExtractor):
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'data-video-url="([^"]+)"', r'data-video-url="([^"]+)"',
prefetch_data['embed_code'], 'video URL') article['embed_code'], 'video URL')
return _url_res(video_url, ViceIE.ie_key()) return _url_res(video_url, ViceIE.ie_key())

View file

@ -1,46 +0,0 @@
from __future__ import unicode_literals
import re
import random
from .common import InfoExtractor
class VideoPremiumIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
_TEST = {
'url': 'http://videopremium.tv/4w7oadjsf156',
'info_dict': {
'id': '4w7oadjsf156',
'ext': 'f4v',
'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4'
},
'params': {
'skip_download': True,
},
'skip': 'Test file has been deleted.',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,
note='Downloading webpage again (with cookie)')
video_title = self._html_search_regex(
r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, 'video title')
return {
'id': video_id,
'url': 'rtmp://e%d.md.iplay.md/play' % random.randint(1, 16),
'play_path': 'mp4:%s.f4v' % video_id,
'page_url': 'http://videopremium.tv/' + video_id,
'player_url': 'http://videopremium.tv/uplayer/uppod.swf',
'ext': 'f4v',
'title': video_title,
}

View file

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import collections import collections
import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -11,6 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
OnDemandPagedList,
orderedSet, orderedSet,
str_or_none, str_or_none,
str_to_int, str_to_int,
@ -477,14 +479,23 @@ class VKIE(VKBaseIE):
class VKUserVideosIE(VKBaseIE): class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos' IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos" IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
_TEMPLATE_URL = 'https://vk.com/videos' _TEMPLATE_URL = 'https://vk.com/videos'
_TESTS = [{ _TESTS = [{
'url': 'http://vk.com/videos205387401', 'url': 'https://vk.com/videos-767561',
'info_dict': { 'info_dict': {
'id': '205387401', 'id': '-767561_all',
}, },
'playlist_mincount': 4, 'playlist_mincount': 1150,
}, {
'url': 'https://vk.com/videos-767561?section=uploaded',
'info_dict': {
'id': '-767561_uploaded',
},
'playlist_mincount': 425,
}, {
'url': 'http://vk.com/videos205387401',
'only_matching': True,
}, { }, {
'url': 'http://vk.com/videos-77521', 'url': 'http://vk.com/videos-77521',
'only_matching': True, 'only_matching': True,
@ -498,25 +509,33 @@ class VKUserVideosIE(VKBaseIE):
'url': 'http://new.vk.com/videos205387401', 'url': 'http://new.vk.com/videos205387401',
'only_matching': True, 'only_matching': True,
}] }]
_VIDEO = collections.namedtuple( _PAGE_SIZE = 1000
'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta']) _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
def _real_extract(self, url):
page_id = self._match_id(url)
def _fetch_page(self, page_id, section, page):
l = self._download_payload('al_video', page_id, { l = self._download_payload('al_video', page_id, {
'act': 'load_videos_silent', 'act': 'load_videos_silent',
'offset': page * self._PAGE_SIZE,
'oid': page_id, 'oid': page_id,
})[0]['']['list'] 'section': section,
})[0][section]['list']
entries = []
for video in l: for video in l:
v = self._VIDEO._make(video) v = self._VIDEO._make(video[:2])
video_id = '%d_%d' % (v.owner_id, v.id) video_id = '%d_%d' % (v.owner_id, v.id)
entries.append(self.url_result( yield self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)) 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
return self.playlist_result(entries, page_id) def _real_extract(self, url):
page_id, section = re.match(self._VALID_URL, url).groups()
if not section:
section = 'all'
entries = OnDemandPagedList(
functools.partial(self._fetch_page, page_id, section),
self._PAGE_SIZE)
return self.playlist_result(entries, '%s_%s' % (page_id, section))
class VKWallPostIE(VKBaseIE): class VKWallPostIE(VKBaseIE):
@ -580,8 +599,7 @@ class VKWallPostIE(VKBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
_AUDIO = collections.namedtuple( _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key'])
def _decode(self, enc): def _decode(self, enc):
dec = '' dec = ''
@ -629,7 +647,7 @@ class VKWallPostIE(VKBaseIE):
for audio in re.findall(r'data-audio="([^"]+)', webpage): for audio in re.findall(r'data-audio="([^"]+)', webpage):
audio = self._parse_json(unescapeHTML(audio), post_id) audio = self._parse_json(unescapeHTML(audio), post_id)
a = self._AUDIO._make(audio) a = self._AUDIO._make(audio[:16])
if not a.url: if not a.url:
continue continue
title = unescapeHTML(a.title) title = unescapeHTML(a.title)

View file

@ -6,22 +6,18 @@ import time
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from .naver import NaverBaseIE
compat_urllib_parse_urlencode, from ..compat import compat_str
compat_str,
)
from ..utils import ( from ..utils import (
dict_get,
ExtractorError, ExtractorError,
float_or_none, merge_dicts,
int_or_none,
remove_start, remove_start,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
) )
class VLiveIE(InfoExtractor): class VLiveIE(NaverBaseIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
_NETRC_MACHINE = 'vlive' _NETRC_MACHINE = 'vlive'
@ -34,6 +30,7 @@ class VLiveIE(InfoExtractor):
'title': "[V LIVE] Girl's Day's Broadcast", 'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day", 'creator': "Girl's Day",
'view_count': int, 'view_count': int,
'uploader_id': 'muploader_a',
}, },
}, { }, {
'url': 'http://www.vlive.tv/video/16937', 'url': 'http://www.vlive.tv/video/16937',
@ -44,6 +41,7 @@ class VLiveIE(InfoExtractor):
'creator': 'EXO', 'creator': 'EXO',
'view_count': int, 'view_count': int,
'subtitles': 'mincount:12', 'subtitles': 'mincount:12',
'uploader_id': 'muploader_j',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -187,45 +185,9 @@ class VLiveIE(InfoExtractor):
'This video is only available for CH+ subscribers') 'This video is only available for CH+ subscribers')
long_video_id, key = video_info['vid'], video_info['inkey'] long_video_id, key = video_info['vid'], video_info['inkey']
playinfo = self._download_json( return merge_dicts(
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' self._get_common_fields(webpage),
% compat_urllib_parse_urlencode({ self._extract_video_info(video_id, long_video_id, key))
'videoId': long_video_id,
'key': key,
'ptc': 'http',
'doct': 'json', # document type (xml or json)
'cpt': 'vtt', # captions type (vtt or ttml)
}), video_id)
formats = [{
'url': vid['source'],
'format_id': vid.get('encodingOption', {}).get('name'),
'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
'width': int_or_none(vid.get('encodingOption', {}).get('width')),
'height': int_or_none(vid.get('encodingOption', {}).get('height')),
'filesize': int_or_none(vid.get('size')),
} for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
self._sort_formats(formats)
view_count = int_or_none(playinfo.get('meta', {}).get('count'))
subtitles = {}
for caption in playinfo.get('captions', {}).get('list', []):
lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
if lang and caption.get('source'):
subtitles[lang] = [{
'ext': 'vtt',
'url': caption['source']}]
info = self._get_common_fields(webpage)
info.update({
'id': video_id,
'formats': formats,
'view_count': view_count,
'subtitles': subtitles,
})
return info
def _download_init_page(self, video_id): def _download_init_page(self, video_id):
return self._download_webpage( return self._download_webpage(

View file

@ -6,8 +6,8 @@ from ..utils import unescapeHTML
class VODPlatformIE(InfoExtractor): class VODPlatformIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P<id>[^/?#]+)'
_TEST = { _TESTS = [{
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
'md5': '1db2b7249ce383d6be96499006e951fc', 'md5': '1db2b7249ce383d6be96499006e951fc',
@ -16,7 +16,10 @@ class VODPlatformIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"', 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
} }
} }, {
'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View file

@ -13,8 +13,7 @@ from ..utils import (
class WistiaIE(InfoExtractor): class WistiaIE(InfoExtractor):
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})' _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
_TESTS = [{ _TESTS = [{
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
@ -67,10 +66,10 @@ class WistiaIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
data_json = self._download_json( data_json = self._download_json(
self._API_URL % video_id, video_id, self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
# Some videos require this. # Some videos require this.
headers={ headers={
'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, 'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
}) })
if data_json.get('error'): if data_json.get('error'):
@ -95,27 +94,61 @@ class WistiaIE(InfoExtractor):
'url': aurl, 'url': aurl,
'width': int_or_none(a.get('width')), 'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')), 'height': int_or_none(a.get('height')),
'filesize': int_or_none(a.get('size')),
}) })
else: else:
aext = a.get('ext') aext = a.get('ext')
is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' display_name = a.get('display_name')
formats.append({ format_id = atype
'format_id': atype, if atype and atype.endswith('_video') and display_name:
format_id = '%s-%s' % (atype[:-6], display_name)
f = {
'format_id': format_id,
'url': aurl, 'url': aurl,
'tbr': int_or_none(a.get('bitrate')), 'tbr': int_or_none(a.get('bitrate')) or None,
'vbr': int_or_none(a.get('opt_vbitrate')),
'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')),
'filesize': int_or_none(a.get('size')),
'vcodec': a.get('codec'),
'container': a.get('container'),
'ext': 'mp4' if is_m3u8 else aext,
'protocol': 'm3u8' if is_m3u8 else None,
'preference': 1 if atype == 'original' else None, 'preference': 1 if atype == 'original' else None,
}) }
if display_name == 'Audio':
f.update({
'vcodec': 'none',
})
else:
f.update({
'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')),
'vcodec': a.get('codec'),
})
if a.get('container') == 'm3u8' or aext == 'm3u8':
ts_f = f.copy()
ts_f.update({
'ext': 'ts',
'format_id': f['format_id'].replace('hls-', 'ts-'),
'url': f['url'].replace('.bin', '.ts'),
})
formats.append(ts_f)
f.update({
'ext': 'mp4',
'protocol': 'm3u8_native',
})
else:
f.update({
'container': a.get('container'),
'ext': aext,
'filesize': int_or_none(a.get('size')),
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for caption in data.get('captions', []):
language = caption.get('language')
if not language:
continue
subtitles[language] = [{
'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
}]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -124,4 +157,5 @@ class WistiaIE(InfoExtractor):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(data.get('duration')), 'duration': float_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('createdAt')), 'timestamp': int_or_none(data.get('createdAt')),
'subtitles': subtitles,
} }

View file

@ -1,6 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
urljoin, urljoin,
@ -8,9 +9,9 @@ from ..utils import (
class YourPornIE(InfoExtractor): class YourPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:yourporn\.sexy|sxyprn\.com)/post/(?P<id>[^/?#&.]+)' _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html', 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
'md5': '6f8682b6464033d87acaa7a8ff0c092e', 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
'info_dict': { 'info_dict': {
'id': '57ffcb2e1179b', 'id': '57ffcb2e1179b',
@ -33,11 +34,19 @@ class YourPornIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = urljoin(url, self._parse_json( parts = self._parse_json(
self._search_regex( self._search_regex(
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info', r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
group='data'), group='data'),
video_id)[video_id]).replace('/cdn/', '/cdn5/') video_id)[video_id].split('/')
num = 0
for c in parts[6] + parts[7]:
if c.isnumeric():
num += int(c)
parts[5] = compat_str(int(parts[5]) - num)
parts[1] += '8'
video_url = urljoin(url, '/'.join(parts))
title = (self._search_regex( title = (self._search_regex(
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
@ -54,4 +63,5 @@ class YourPornIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'age_limit': 18, 'age_limit': 18,
'ext': 'mp4',
} }

View file

@ -4,10 +4,20 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
dict_get,
ExtractorError,
int_or_none,
js_to_json,
parse_iso8601,
)
class ZypeIE(InfoExtractor): class ZypeIE(InfoExtractor):
_VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+' _ID_RE = r'[\da-fA-F]+'
_COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
_VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
_TEST = { _TEST = {
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false', 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
'md5': 'eaee31d474c76a955bdaba02a505c595', 'md5': 'eaee31d474c76a955bdaba02a505c595',
@ -16,6 +26,9 @@ class ZypeIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Smoky Barbecue Favorites', 'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g', 'thumbnail': r're:^https?://.*\.jpe?g',
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
'timestamp': 1504915200,
'upload_date': '20170909',
}, },
} }
@ -24,34 +37,98 @@ class ZypeIE(InfoExtractor):
return [ return [
mobj.group('url') mobj.group('url')
for mobj in re.finditer( for mobj in re.finditer(
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1', r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) try:
response = self._download_json(re.sub(
r'\.(?:js|html)\?', '.json?', url), video_id)['response']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
raise ExtractorError(self._parse_json(
e.cause.read().decode(), video_id)['message'], expected=True)
raise
title = self._search_regex( body = response['body']
r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, video = response['video']
'title', group='value') title = video['title']
m3u8_url = self._search_regex( if isinstance(body, dict):
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage, formats = []
'm3u8 url', group='url') for output in body.get('outputs', []):
output_url = output.get('url')
formats = self._extract_m3u8_formats( if not output_url:
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', continue
m3u8_id='hls') name = output.get('name')
if name == 'm3u8':
formats = self._extract_m3u8_formats(
output_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
else:
f = {
'format_id': name,
'tbr': int_or_none(output.get('bitrate')),
'url': output_url,
}
if name in ('m4a', 'mp3'):
f['vcodec'] = 'none'
else:
f.update({
'height': int_or_none(output.get('height')),
'width': int_or_none(output.get('width')),
})
formats.append(f)
text_tracks = body.get('subtitles') or []
else:
m3u8_url = self._search_regex(
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
body, 'm3u8 url', group='url')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
text_tracks = self._search_regex(
r'textTracks\s*:\s*(\[[^]]+\])',
body, 'text tracks', default=None)
if text_tracks:
text_tracks = self._parse_json(
text_tracks, video_id, js_to_json, False)
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._search_regex( subtitles = {}
r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail', if text_tracks:
default=False, group='url') for text_track in text_tracks:
tt_url = dict_get(text_track, ('file', 'src'))
if not tt_url:
continue
subtitles.setdefault(text_track.get('label') or 'English', []).append({
'url': tt_url,
})
thumbnails = []
for thumbnail in video.get('thumbnails', []):
thumbnail_url = thumbnail.get('url')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return { return {
'id': video_id, 'id': video_id,
'display_id': video.get('friendly_title'),
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnails': thumbnails,
'description': dict_get(video, ('description', 'ott_description', 'short_description')),
'timestamp': parse_iso8601(video.get('published_at')),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('request_count')),
'average_rating': int_or_none(video.get('rating')),
'season_number': int_or_none(video.get('season')),
'episode_number': int_or_none(video.get('episode')),
'formats': formats, 'formats': formats,
'subtitles': subtitles,
} }

View file

@ -46,6 +46,7 @@ from .compat import (
compat_html_entities, compat_html_entities,
compat_html_entities_html5, compat_html_entities_html5,
compat_http_client, compat_http_client,
compat_integer_types,
compat_kwargs, compat_kwargs,
compat_os_name, compat_os_name,
compat_parse_qs, compat_parse_qs,
@ -3519,10 +3520,11 @@ def str_or_none(v, default=None):
def str_to_int(int_str): def str_to_int(int_str):
""" A more relaxed version of int_or_none """ """ A more relaxed version of int_or_none """
if not isinstance(int_str, compat_str): if isinstance(int_str, compat_integer_types):
return int_str return int_str
int_str = re.sub(r'[,\.\+]', '', int_str) elif isinstance(int_str, compat_str):
return int(int_str) int_str = re.sub(r'[,\.\+]', '', int_str)
return int_or_none(int_str)
def float_or_none(v, scale=1, invscale=1, default=None): def float_or_none(v, scale=1, invscale=1, default=None):

View file

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.11.28' __version__ = '2020.01.15'