mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 03:32:05 +00:00
Merge remote-tracking branch 'upstream/master' into binustvextractor
This commit is contained in:
commit
a1ce32fe2a
149 changed files with 7864 additions and 4479 deletions
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
|
@ -18,7 +18,7 @@ title: ''
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.09. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
|
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support
|
- [ ] I'm reporting a broken site support
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.03.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.12.09**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||||
|
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2020.03.24
|
[debug] youtube-dl version 2020.12.09
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
|
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.09. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
|
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a new site support request
|
- [ ] I'm reporting a new site support request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.03.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.12.09**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||||
|
|
|
@ -18,13 +18,13 @@ title: ''
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.09. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a site feature request
|
- [ ] I'm reporting a site feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.03.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.12.09**
|
||||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
|
@ -18,7 +18,7 @@ title: ''
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.09. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
|
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support issue
|
- [ ] I'm reporting a broken site support issue
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.03.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.12.09**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||||
|
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2020.03.24
|
[debug] youtube-dl version 2020.12.09
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
|
@ -19,13 +19,13 @@ labels: 'request'
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.09. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a feature request
|
- [ ] I'm reporting a feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.03.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.12.09**
|
||||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
|
26
.travis.yml
26
.travis.yml
|
@ -12,29 +12,29 @@ python:
|
||||||
dist: trusty
|
dist: trusty
|
||||||
env:
|
env:
|
||||||
- YTDL_TEST_SET=core
|
- YTDL_TEST_SET=core
|
||||||
- YTDL_TEST_SET=download
|
# - YTDL_TEST_SET=download
|
||||||
jobs:
|
jobs:
|
||||||
include:
|
include:
|
||||||
- python: 3.7
|
- python: 3.7
|
||||||
dist: xenial
|
dist: xenial
|
||||||
env: YTDL_TEST_SET=core
|
env: YTDL_TEST_SET=core
|
||||||
- python: 3.7
|
# - python: 3.7
|
||||||
dist: xenial
|
# dist: xenial
|
||||||
env: YTDL_TEST_SET=download
|
# env: YTDL_TEST_SET=download
|
||||||
- python: 3.8
|
- python: 3.8
|
||||||
dist: xenial
|
dist: xenial
|
||||||
env: YTDL_TEST_SET=core
|
env: YTDL_TEST_SET=core
|
||||||
- python: 3.8
|
# - python: 3.8
|
||||||
dist: xenial
|
# dist: xenial
|
||||||
env: YTDL_TEST_SET=download
|
# env: YTDL_TEST_SET=download
|
||||||
- python: 3.8-dev
|
- python: 3.8-dev
|
||||||
dist: xenial
|
dist: xenial
|
||||||
env: YTDL_TEST_SET=core
|
env: YTDL_TEST_SET=core
|
||||||
- python: 3.8-dev
|
# - python: 3.8-dev
|
||||||
dist: xenial
|
# dist: xenial
|
||||||
env: YTDL_TEST_SET=download
|
# env: YTDL_TEST_SET=download
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||||
- name: flake8
|
- name: flake8
|
||||||
python: 3.8
|
python: 3.8
|
||||||
dist: xenial
|
dist: xenial
|
||||||
|
@ -42,9 +42,9 @@ jobs:
|
||||||
script: flake8 .
|
script: flake8 .
|
||||||
fast_finish: true
|
fast_finish: true
|
||||||
allow_failures:
|
allow_failures:
|
||||||
- env: YTDL_TEST_SET=download
|
# - env: YTDL_TEST_SET=download
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||||
before_install:
|
before_install:
|
||||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||||
script: ./devscripts/run_tests.sh
|
script: ./devscripts/run_tests.sh
|
||||||
|
|
|
@ -153,7 +153,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
|
||||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
|
|
478
ChangeLog
478
ChangeLog
|
@ -1,3 +1,447 @@
|
||||||
|
version 2020.12.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Fix inline HTML5 media tags processing (#27345)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube:tab] Improve identity token extraction (#27197)
|
||||||
|
* [youtube:tab] Make click tracking params on continuation optional
|
||||||
|
* [youtube:tab] Delegate inline playlists to tab-based playlists (27298)
|
||||||
|
+ [tubitv] Extract release year (#27317)
|
||||||
|
* [amcnetworks] Fix free content extraction (#20354)
|
||||||
|
+ [lbry:channel] Add support for channels (#25584)
|
||||||
|
+ [lbry] Add support for short and embed URLs
|
||||||
|
* [lbry] Fix channel metadata extraction
|
||||||
|
+ [telequebec] Add support for video.telequebec.tv (#27339)
|
||||||
|
* [telequebec] Fix extraction (#25733, #26883)
|
||||||
|
+ [youtube:tab] Capture and output alerts (#27340)
|
||||||
|
* [tvplay:home] Fix extraction (#21153)
|
||||||
|
* [americastestkitchen] Fix Extraction and add support
|
||||||
|
for Cook's Country and Cook's Illustrated (#17234, #27322)
|
||||||
|
+ [slideslive] Add support for yoda service videos and extract subtitles
|
||||||
|
(#27323)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.12.07
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Extract timestamp from Last-Modified header
|
||||||
|
+ [extractor/common] Add support for dl8-* media tags (#27283)
|
||||||
|
* [extractor/common] Fix media type extraction for HTML5 media tags
|
||||||
|
in start/end form
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [aenetworks] Fix extraction (#23363, #23390, #26795, #26985)
|
||||||
|
* Fix Fastly format extraction
|
||||||
|
+ Add support for play and watch subdomains
|
||||||
|
+ Extract series metadata
|
||||||
|
* [youtube] Improve youtu.be extraction in non-existing playlists (#27324)
|
||||||
|
+ [generic] Extract RSS video description, timestamp and itunes metadata
|
||||||
|
(#27177)
|
||||||
|
* [nrk] Reduce the number of instalments and episodes requests
|
||||||
|
* [nrk] Improve extraction
|
||||||
|
* Improve format extraction for old akamai formats
|
||||||
|
+ Add is_live value to entry info dict
|
||||||
|
* Request instalments only when available
|
||||||
|
* Fix skole extraction
|
||||||
|
+ [peertube] Extract fps
|
||||||
|
+ [peertube] Recognize audio-only formats (#27295)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.12.05
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Improve Akamai HTTP format extraction
|
||||||
|
* Allow m3u8 manifest without an additional audio format
|
||||||
|
* Fix extraction for qualities starting with a number
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [teachable:course] Improve extraction (#24507, #27286)
|
||||||
|
* [nrk] Improve error extraction
|
||||||
|
* [nrktv:series] Improve extraction (#21926)
|
||||||
|
* [nrktv:season] Improve extraction
|
||||||
|
* [nrk] Improve format extraction and geo-restriction detection (#24221)
|
||||||
|
* [pornhub] Handle HTTP errors gracefully (#26414)
|
||||||
|
* [nrktv] Relax URL regular expression (#27299, #26185)
|
||||||
|
+ [zdf] Extract webm formats (#26659)
|
||||||
|
+ [gamespot] Extract DASH and HTTP formats
|
||||||
|
+ [tver] Add support for tver.jp (#26662, #27284)
|
||||||
|
+ [pornhub] Add support for pornhub.org (#27276)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.12.02
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [tva] Add support for qub.ca (#27235)
|
||||||
|
+ [toggle] Detect DRM protected videos (closes #16479)(closes #20805)
|
||||||
|
+ [toggle] Add support for new MeWatch URLs (#27256)
|
||||||
|
* [youtube:tab] Extract channels only from channels tab (#27266)
|
||||||
|
+ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030,
|
||||||
|
#18806, #23148, #24461, #26171, #26800, #27263)
|
||||||
|
* [cspan] Pass Referer header with format's video URL (#26032, #25729)
|
||||||
|
* [youtube] Improve age-gated videos extraction (#27259)
|
||||||
|
+ [mediaset] Add support for movie URLs (#27240)
|
||||||
|
* [yandexmusic] Refactor
|
||||||
|
+ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284)
|
||||||
|
* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.29
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Write static debug to stderr and respect quiet for dynamic debug
|
||||||
|
(#14579, #22593)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [drtv] Extend URL regular expression (#27243)
|
||||||
|
* [tiktok] Fix extraction (#20809, #22838, #22850, #25987, #26281, #26411,
|
||||||
|
#26639, #26776, #27237)
|
||||||
|
+ [ina] Add support for mobile URLs (#27229)
|
||||||
|
* [pornhub] Fix like and dislike count extraction (#27227, #27234)
|
||||||
|
* [youtube] Improve yt initial player response extraction (#27216)
|
||||||
|
* [videa] Fix extraction (#25650, #25973, #26301)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.26
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/fragment] Set final file's mtime according to last fragment's
|
||||||
|
Last-Modified header (#11718, #18384, #27138)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [spreaker] Add support for spreaker.com (#13480, #13877)
|
||||||
|
* [vlive] Improve extraction for geo-restricted videos
|
||||||
|
+ [vlive] Add support for post URLs (#27122, #27123)
|
||||||
|
* [viki] Fix video API request (#27184)
|
||||||
|
* [bbc] Fix BBC Three clip extraction
|
||||||
|
* [bbc] Fix BBC News videos extraction
|
||||||
|
+ [medaltv] Add support for medal.tv (#27149)
|
||||||
|
* [youtube] Improve music metadata and license extraction (#26013)
|
||||||
|
* [nrk] Fix extraction
|
||||||
|
* [cda] Fix extraction (#17803, #24458, #24518, #26381)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.24
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Add generic support for akamai HTTP format extraction
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube:tab] Fix feeds extraction (#25695, #26452)
|
||||||
|
* [youtube:favorites] Restore extractor
|
||||||
|
* [youtube:tab] Fix some weird typo (#27157)
|
||||||
|
+ [pinterest] Add support for large collections (more than 25 pins)
|
||||||
|
+ [franceinter] Extract thumbnail (#27153)
|
||||||
|
+ [box] Add support for box.com (#5949)
|
||||||
|
+ [nytimes] Add support for cooking.nytimes.com (#27112, #27143)
|
||||||
|
* [lbry] Relax URL regular expression (#27144)
|
||||||
|
+ [rumble] Add support for embed pages (#10785)
|
||||||
|
+ [skyit] Add support for multiple Sky Italia websites (#26629)
|
||||||
|
+ [pinterest] Add support for pinterest.com (#25747)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.21.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/http] Fix crash during urlopen caused by missing reason
|
||||||
|
of URLError
|
||||||
|
* [YoutubeDL] Fix --ignore-errors for playlists with generator-based entries
|
||||||
|
of url_transparent (#27064)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [svtplay] Add support for svt.se/barnkanalen (#24817)
|
||||||
|
+ [svt] Extract timestamp (#27130)
|
||||||
|
* [svtplay] Improve thumbnail extraction (#27130)
|
||||||
|
* [youtube] Fix error reason extraction (#27081)
|
||||||
|
* [youtube] Fix like and dislike count extraction (#25977)
|
||||||
|
+ [youtube:tab] Add support for current video and fix lives extraction (#27126)
|
||||||
|
* [infoq] Fix format extraction (#25984)
|
||||||
|
* [francetv] Update to fix thumbnail URL issue (#27120)
|
||||||
|
* [youtube] Improve yt initial data extraction (#27093)
|
||||||
|
+ [discoverynetworks] Add support new TLC/DMAX URLs (#27100)
|
||||||
|
* [rai] Fix protocol relative relinker URLs (#22766)
|
||||||
|
* [rai] Fix unavailable video format detection
|
||||||
|
* [rai] Improve extraction
|
||||||
|
* [rai] Fix extraction (#27077)
|
||||||
|
* [viki] Improve format extraction
|
||||||
|
* [viki] Fix stream extraction from MPD (#27092)
|
||||||
|
* [googledrive] Fix format extraction (#26979)
|
||||||
|
+ [amara] Add support for amara.org (#20618)
|
||||||
|
* [vimeo:album] Fix extraction (#27079)
|
||||||
|
* [mtv] Fix mgid extraction (#26841)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.19
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Output error for invalid URLs in _is_valid_url (#21400,
|
||||||
|
#24151, #25617, #25618, #25586, #26068, #27072)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youporn] Fix upload date extraction
|
||||||
|
* [youporn] Make comment count optional (#26986)
|
||||||
|
* [arte] Rework extractors
|
||||||
|
* Reimplement embed and playlist extractors to delegate to the single
|
||||||
|
entrypoint artetv extractor
|
||||||
|
* Improve embeds detection (#27057)
|
||||||
|
+ [arte] Extract m3u8 formats (#27061)
|
||||||
|
* [mgtv] Fix format extraction (#26415)
|
||||||
|
+ [lbry] Add support for odysee.com (#26806)
|
||||||
|
* [francetv] Improve info extraction
|
||||||
|
+ [francetv] Add fallback video URL extraction (#27047)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [spiegel] Fix extraction (#24206, #24767)
|
||||||
|
* [youtube] Improve extraction
|
||||||
|
+ Add support for --no-playlist (#27009)
|
||||||
|
* Improve playlist and mix extraction (#26390, #26509, #26534, #27011)
|
||||||
|
+ Extract playlist uploader data
|
||||||
|
* [youtube:tab] Fix view count extraction (#27051)
|
||||||
|
* [malltv] Fix extraction (#27035)
|
||||||
|
+ [bandcamp] Extract playlist description (#22684)
|
||||||
|
* [urplay] Fix extraction (#26828)
|
||||||
|
* [youtube:tab] Fix playlist title extraction (#27015)
|
||||||
|
* [youtube] Fix chapters extraction (#26005)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.17
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Skip ! prefixed code in js_to_json
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube:tab] Fix extraction with cookies provided (#27005)
|
||||||
|
* [lrt] Fix extraction with empty tags (#20264)
|
||||||
|
+ [ndr:embed:base] Extract subtitles (#25447, #26106)
|
||||||
|
+ [servus] Add support for pm-wissen.com (#25869)
|
||||||
|
* [servus] Fix extraction (#26872, #26967, #26983, #27000)
|
||||||
|
* [xtube] Fix extraction (#26996)
|
||||||
|
* [lrt] Fix extraction
|
||||||
|
+ [lbry] Add support for lbry.tv
|
||||||
|
+ [condenast] Extract subtitles
|
||||||
|
* [condenast] Fix extraction
|
||||||
|
* [bandcamp] Fix extraction (#26681, #26684)
|
||||||
|
* [rai] Fix RaiPlay extraction (#26064, #26096)
|
||||||
|
* [vlive] Fix extraction
|
||||||
|
* [usanetwork] Fix extraction
|
||||||
|
* [nbc] Fix NBCNews/Today/MSNBC extraction
|
||||||
|
* [cnbc] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.12
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Rework extractors
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.01
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
|
||||||
|
* [downloader/http] Properly handle missing message in SSLError (#26646)
|
||||||
|
* [downloader/http] Fix access to not yet opened stream in retry
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix JS player URL extraction
|
||||||
|
* [ytsearch] Fix extraction (#26920)
|
||||||
|
* [afreecatv] Fix typo (#26970)
|
||||||
|
* [23video] Relax URL regular expression (#26870)
|
||||||
|
+ [ustream] Add support for video.ibm.com (#26894)
|
||||||
|
* [iqiyi] Fix typo (#26884)
|
||||||
|
+ [expressen] Add support for di.se (#26670)
|
||||||
|
* [iprima] Improve video id extraction (#26507, #26494)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.09.20
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Relax interaction count extraction in _json_ld
|
||||||
|
+ [extractor/common] Extract author as uploader for VideoObject in _json_ld
|
||||||
|
* [downloader/hls] Fix incorrect end byte in Range HTTP header for
|
||||||
|
media segments with EXT-X-BYTERANGE (#14748, #24512)
|
||||||
|
* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601)
|
||||||
|
* [downloader/http] Improve timeout detection when reading block of data
|
||||||
|
(#10935)
|
||||||
|
* [downloader/http] Retry download when urlopen times out (#10935, #26603)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [redtube] Extend URL regular expression (#26506)
|
||||||
|
* [twitch] Refactor
|
||||||
|
* [twitch:stream] Switch to GraphQL and fix reruns (#26535)
|
||||||
|
+ [telequebec] Add support for brightcove videos (#25833)
|
||||||
|
* [pornhub] Extract metadata from JSON-LD (#26614)
|
||||||
|
* [pornhub] Fix view count extraction (#26621, #26614)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.09.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails
|
||||||
|
(#25687, #25717)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [rtlnl] Extend URL regular expression (#26549, #25821)
|
||||||
|
* [youtube] Fix empty description extraction (#26575, #26006)
|
||||||
|
* [srgssr] Extend URL regular expression (#26555, #26556, #26578)
|
||||||
|
* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689,
|
||||||
|
#26565)
|
||||||
|
* [svtplay] Fix id extraction (#26576)
|
||||||
|
* [redbulltv] Improve support for rebull.com TV localized URLs (#22063)
|
||||||
|
+ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063)
|
||||||
|
* [soundcloud:pagedplaylist] Reduce pagination limit (#26557)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.09.06
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Recognize wav mimetype (#26463)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409)
|
||||||
|
* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384)
|
||||||
|
* [youtube:user] Extend URL regular expression (#26443)
|
||||||
|
* [xhamster] Improve initials regular expression (#26526, #26353)
|
||||||
|
* [svtplay] Fix video id extraction (#26425, #26428, #26438)
|
||||||
|
* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979,
|
||||||
|
#24263, #25010, #25553, #25606)
|
||||||
|
* Switch to GraphQL
|
||||||
|
+ Add support for collections
|
||||||
|
+ Add support for clips and collections playlists
|
||||||
|
* [biqle] Improve video ext extraction
|
||||||
|
* [xhamster] Fix extraction (#26157, #26254)
|
||||||
|
* [xhamster] Extend URL regular expression (#25789, #25804, #25927))
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.07.28
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
|
||||||
|
* [youtube] Improve description extraction (#25937, #25980)
|
||||||
|
* [wistia] Restrict embed regular expression (#25969)
|
||||||
|
* [youtube] Prevent excess HTTP 301 (#25786)
|
||||||
|
+ [youtube:playlists] Extend URL regular expression (#25810)
|
||||||
|
+ [bellmedia] Add support for cp24.com clip URLs (#25764)
|
||||||
|
* [brightcove] Improve embed detection (#25674)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.06.16.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Force old layout (#25682, #25683, #25680, #25686)
|
||||||
|
* [youtube] Fix categories and improve tags extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.06.16
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix uploader id and uploader URL extraction
|
||||||
|
* [youtube] Improve view count extraction
|
||||||
|
* [youtube] Fix upload date extraction (#25677)
|
||||||
|
* [youtube] Fix thumbnails extraction (#25676)
|
||||||
|
* [youtube] Fix playlist and feed extraction (#25675)
|
||||||
|
+ [facebook] Add support for single-video ID links
|
||||||
|
+ [youtube] Extract chapters from JSON (#24819)
|
||||||
|
+ [kaltura] Add support for multiple embeds on a webpage (#25523)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.06.06
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [tele5] Bypass geo restriction
|
||||||
|
+ [jwplatform] Add support for bypass geo restriction
|
||||||
|
* [tele5] Prefer jwplatform over nexx (#25533)
|
||||||
|
* [twitch:stream] Expect 400 and 410 HTTP errors from API
|
||||||
|
* [twitch:stream] Fix extraction (#25528)
|
||||||
|
* [twitch] Fix thumbnails extraction (#25531)
|
||||||
|
+ [twitch] Pass v5 Accept HTTP header (#25531)
|
||||||
|
* [brightcove] Fix subtitles extraction (#25540)
|
||||||
|
+ [malltv] Add support for sk.mall.tv (#25445)
|
||||||
|
* [periscope] Fix untitled broadcasts (#25482)
|
||||||
|
* [jwplatform] Improve embeds extraction (#25467)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.05.29
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [postprocessor/ffmpeg] Embed series metadata with --add-metadata
|
||||||
|
* [utils] Fix file permissions in write_json_file (#12471, #25122)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [ard:beta] Extend URL regular expression (#25405)
|
||||||
|
+ [youtube] Add support for more invidious instances (#25417)
|
||||||
|
* [giantbomb] Extend URL regular expression (#25222)
|
||||||
|
* [ard] Improve URL regular expression (#25134, #25198)
|
||||||
|
* [redtube] Improve formats extraction and extract m3u8 formats (#25311,
|
||||||
|
#25321)
|
||||||
|
* [indavideo] Switch to HTTPS for API request (#25191)
|
||||||
|
* [redtube] Improve title extraction (#25208)
|
||||||
|
* [vimeo] Improve format extraction and sorting (#25285)
|
||||||
|
* [soundcloud] Reduce API playlist page limit (#25274)
|
||||||
|
+ [youtube] Add support for yewtu.be (#25226)
|
||||||
|
* [mailru] Fix extraction (#24530, #25239)
|
||||||
|
* [bellator] Fix mgid extraction (#25195)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.05.08
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/http] Request last data block of exact remaining size
|
||||||
|
* [downloader/http] Finish downloading once received data length matches
|
||||||
|
expected
|
||||||
|
* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
|
||||||
|
ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
|
||||||
|
+ [compat] Introduce compat_cookiejar_Cookie
|
||||||
|
* [utils] Improve cookie files support
|
||||||
|
+ Add support for UTF-8 in cookie files
|
||||||
|
* Skip malformed cookie file entries instead of crashing (invalid entry
|
||||||
|
length, invalid expires at)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Improve signature cipher extraction (#25187, #25188)
|
||||||
|
* [iprima] Improve extraction (#25138)
|
||||||
|
* [uol] Fix extraction (#22007)
|
||||||
|
+ [orf] Add support for more radio stations (#24938, #24968)
|
||||||
|
* [dailymotion] Fix typo
|
||||||
|
- [puhutv] Remove no longer available HTTP formats (#25124)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.05.03
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Extract multiple JSON-LD entries
|
||||||
|
* [options] Clarify doc on --exec command (#19087, #24883)
|
||||||
|
* [extractor/common] Skip malformed ISM manifest XMLs while extracting
|
||||||
|
ISM formats (#24667)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [crunchyroll] Fix and improve extraction (#25096, #25060)
|
||||||
|
* [youtube] Improve player id extraction
|
||||||
|
* [youtube] Use redirected video id if any (#25063)
|
||||||
|
* [yahoo] Fix GYAO Player extraction and relax URL regular expression
|
||||||
|
(#24178, #24778)
|
||||||
|
* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
|
||||||
|
* [tenplay] Relax URL regular expression (#25001)
|
||||||
|
+ [prosiebensat1] Extract series metadata
|
||||||
|
* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
|
||||||
|
- [prosiebensat1] Remove 7tv.de support (#24948)
|
||||||
|
* [youtube] Fix DRM videos detection (#24736)
|
||||||
|
* [thisoldhouse] Fix video id extraction (#24548, #24549)
|
||||||
|
+ [soundcloud] Extract AAC format (#19173, #24708)
|
||||||
|
* [youtube] Skip broken multifeed videos (#24711)
|
||||||
|
* [nova:embed] Fix extraction (#24700)
|
||||||
|
* [motherless] Fix extraction (#24699)
|
||||||
|
* [twitch:clips] Extend URL regular expression (#24290, #24642)
|
||||||
|
* [tv4] Fix ISM formats extraction (#24667)
|
||||||
|
* [tele5] Fix extraction (#24553)
|
||||||
|
+ [mofosex] Add support for generic embeds (#24633)
|
||||||
|
+ [youporn] Add support for generic embeds
|
||||||
|
+ [spankwire] Add support for generic embeds (#24633)
|
||||||
|
* [spankwire] Fix extraction (#18924, #20648)
|
||||||
|
|
||||||
|
|
||||||
version 2020.03.24
|
version 2020.03.24
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@ -96,7 +540,7 @@ Extractors
|
||||||
+ Add support for more domains
|
+ Add support for more domains
|
||||||
* [svt] Fix series extraction (#22297)
|
* [svt] Fix series extraction (#22297)
|
||||||
* [svt] Fix article extraction (#22897, #22919)
|
* [svt] Fix article extraction (#22897, #22919)
|
||||||
* [soundcloud] Imporve private playlist/set tracks extraction (#3707)
|
* [soundcloud] Improve private playlist/set tracks extraction (#3707)
|
||||||
|
|
||||||
|
|
||||||
version 2020.01.24
|
version 2020.01.24
|
||||||
|
@ -222,7 +666,7 @@ Extractors
|
||||||
* [abcotvs] Relax URL regular expression and improve metadata extraction
|
* [abcotvs] Relax URL regular expression and improve metadata extraction
|
||||||
(#18014)
|
(#18014)
|
||||||
* [channel9] Reduce response size
|
* [channel9] Reduce response size
|
||||||
* [adobetv] Improve extaction
|
* [adobetv] Improve extraction
|
||||||
* Use OnDemandPagedList for list extractors
|
* Use OnDemandPagedList for list extractors
|
||||||
* Reduce show extraction requests
|
* Reduce show extraction requests
|
||||||
* Extract original video format and subtitles
|
* Extract original video format and subtitles
|
||||||
|
@ -247,7 +691,7 @@ Extractors
|
||||||
* [dailymotion] Improve extraction
|
* [dailymotion] Improve extraction
|
||||||
* Extract http formats included in m3u8 manifest
|
* Extract http formats included in m3u8 manifest
|
||||||
* Fix user extraction (#3553, #21415)
|
* Fix user extraction (#3553, #21415)
|
||||||
+ Add suport for User Authentication (#11491)
|
+ Add support for User Authentication (#11491)
|
||||||
* Fix password protected videos extraction (#23176)
|
* Fix password protected videos extraction (#23176)
|
||||||
* Respect age limit option and family filter cookie value (#18437)
|
* Respect age limit option and family filter cookie value (#18437)
|
||||||
* Handle video url playlist query param
|
* Handle video url playlist query param
|
||||||
|
@ -332,7 +776,7 @@ Extractors
|
||||||
- [go90] Remove extractor
|
- [go90] Remove extractor
|
||||||
* [kakao] Remove raw request
|
* [kakao] Remove raw request
|
||||||
+ [kakao] Extract format total bitrate
|
+ [kakao] Extract format total bitrate
|
||||||
* [daum] Fix VOD and Clip extracton (#15015)
|
* [daum] Fix VOD and Clip extraction (#15015)
|
||||||
* [kakao] Improve extraction
|
* [kakao] Improve extraction
|
||||||
+ Add support for embed URLs
|
+ Add support for embed URLs
|
||||||
+ Add support for Kakao Legacy vid based embed URLs
|
+ Add support for Kakao Legacy vid based embed URLs
|
||||||
|
@ -376,7 +820,7 @@ Extractors
|
||||||
* Improve format extraction (#22123)
|
* Improve format extraction (#22123)
|
||||||
+ Extract uploader_id and uploader_url (#21916)
|
+ Extract uploader_id and uploader_url (#21916)
|
||||||
+ Extract all known thumbnails (#19071, #20659)
|
+ Extract all known thumbnails (#19071, #20659)
|
||||||
* Fix extration for private playlists (#20976)
|
* Fix extraction for private playlists (#20976)
|
||||||
+ Add support for playlist embeds (#20976)
|
+ Add support for playlist embeds (#20976)
|
||||||
* Skip preview formats (#22806)
|
* Skip preview formats (#22806)
|
||||||
* [dplay] Improve extraction
|
* [dplay] Improve extraction
|
||||||
|
@ -851,7 +1295,7 @@ Extractors
|
||||||
* [hbo] Fix extraction and extract subtitles (#14629, #13709)
|
* [hbo] Fix extraction and extract subtitles (#14629, #13709)
|
||||||
* [youtube] Extract srv[1-3] subtitle formats (#20566)
|
* [youtube] Extract srv[1-3] subtitle formats (#20566)
|
||||||
* [adultswim] Fix extraction (#18025)
|
* [adultswim] Fix extraction (#18025)
|
||||||
* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339)
|
* [teamcoco] Fix extraction and add support for subdomains (#17099, #20339)
|
||||||
* [adn] Fix subtitle compatibility with ffmpeg
|
* [adn] Fix subtitle compatibility with ffmpeg
|
||||||
* [adn] Fix extraction and add support for positioning styles (#20549)
|
* [adn] Fix extraction and add support for positioning styles (#20549)
|
||||||
* [vk] Use unique video id (#17848)
|
* [vk] Use unique video id (#17848)
|
||||||
|
@ -1263,7 +1707,7 @@ version 2018.11.18
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
+ [wwe] Extract subtitles
|
+ [wwe] Extract subtitles
|
||||||
+ [wwe] Add support for playlistst (#14781)
|
+ [wwe] Add support for playlists (#14781)
|
||||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||||
* [vk] Detect geo restriction (#17767)
|
* [vk] Detect geo restriction (#17767)
|
||||||
* [openload] Use original host during extraction (#18211)
|
* [openload] Use original host during extraction (#18211)
|
||||||
|
@ -2296,7 +2740,7 @@ Extractors
|
||||||
* [youku] Update ccode (#14872)
|
* [youku] Update ccode (#14872)
|
||||||
* [mnet] Fix format extraction (#14883)
|
* [mnet] Fix format extraction (#14883)
|
||||||
+ [xiami] Add Referer header to API request
|
+ [xiami] Add Referer header to API request
|
||||||
* [mtv] Correct scc extention in extracted subtitles (#13730)
|
* [mtv] Correct scc extension in extracted subtitles (#13730)
|
||||||
* [vvvvid] Fix extraction for kenc videos (#13406)
|
* [vvvvid] Fix extraction for kenc videos (#13406)
|
||||||
+ [br] Add support for BR Mediathek videos (#14560, #14788)
|
+ [br] Add support for BR Mediathek videos (#14560, #14788)
|
||||||
+ [daisuki] Add support for motto.daisuki.com (#14681)
|
+ [daisuki] Add support for motto.daisuki.com (#14681)
|
||||||
|
@ -2317,7 +2761,7 @@ Extractors
|
||||||
* [nexx] Extract more formats
|
* [nexx] Extract more formats
|
||||||
+ [openload] Add support for openload.link (#14763)
|
+ [openload] Add support for openload.link (#14763)
|
||||||
* [empflix] Relax URL regular expression
|
* [empflix] Relax URL regular expression
|
||||||
* [empflix] Fix extractrion
|
* [empflix] Fix extraction
|
||||||
* [tnaflix] Don't modify download URLs (#14811)
|
* [tnaflix] Don't modify download URLs (#14811)
|
||||||
- [gamersyde] Remove extractor
|
- [gamersyde] Remove extractor
|
||||||
* [francetv:generationwhat] Fix extraction
|
* [francetv:generationwhat] Fix extraction
|
||||||
|
@ -2512,7 +2956,7 @@ Extractors
|
||||||
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
||||||
* [yahoo] Use extracted brightcove account id (#14210)
|
* [yahoo] Use extracted brightcove account id (#14210)
|
||||||
* [rtve:alacarta] Fix extraction (#14290)
|
* [rtve:alacarta] Fix extraction (#14290)
|
||||||
+ [yahoo] Add support for custom brigthcove embeds (#14210)
|
+ [yahoo] Add support for custom brightcove embeds (#14210)
|
||||||
+ [generic] Add support for Video.js embeds
|
+ [generic] Add support for Video.js embeds
|
||||||
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
||||||
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
||||||
|
@ -2757,7 +3201,7 @@ Extractors
|
||||||
* [amcnetworks] Make rating optional (#12453)
|
* [amcnetworks] Make rating optional (#12453)
|
||||||
* [cloudy] Fix extraction (#13737)
|
* [cloudy] Fix extraction (#13737)
|
||||||
+ [nickru] Add support for nickelodeon.ru
|
+ [nickru] Add support for nickelodeon.ru
|
||||||
* [mtv] Improve thumbnal extraction
|
* [mtv] Improve thumbnail extraction
|
||||||
* [nick] Automate geo-restriction bypass (#13711)
|
* [nick] Automate geo-restriction bypass (#13711)
|
||||||
* [niconico] Improve error reporting (#13696)
|
* [niconico] Improve error reporting (#13696)
|
||||||
|
|
||||||
|
@ -3121,7 +3565,7 @@ Extractors
|
||||||
+ [cda] Support birthday verification (#12789)
|
+ [cda] Support birthday verification (#12789)
|
||||||
* [leeco] Fix extraction (#12974)
|
* [leeco] Fix extraction (#12974)
|
||||||
+ [pbs] Extract chapters
|
+ [pbs] Extract chapters
|
||||||
* [amp] Imporove thumbnail and subtitles extraction
|
* [amp] Improve thumbnail and subtitles extraction
|
||||||
* [foxsports] Fix extraction (#12945)
|
* [foxsports] Fix extraction (#12945)
|
||||||
- [coub] Remove comment count extraction (#12941)
|
- [coub] Remove comment count extraction (#12941)
|
||||||
|
|
||||||
|
@ -3291,7 +3735,7 @@ Extractors
|
||||||
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
|
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
|
||||||
+ [npo:live] Add support for default URL (#12555)
|
+ [npo:live] Add support for default URL (#12555)
|
||||||
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
|
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
|
||||||
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
|
+ [thesun] Add support for thesun.co.uk (#11298, #12674)
|
||||||
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
|
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
|
||||||
* [ceskateleveize] Improve extraction and remove URL replacement hacks
|
* [ceskateleveize] Improve extraction and remove URL replacement hacks
|
||||||
+ [kaltura] Add support for iframe embeds (#12679)
|
+ [kaltura] Add support for iframe embeds (#12679)
|
||||||
|
@ -3330,7 +3774,7 @@ Extractors
|
||||||
* [funimation] Fix extraction (#10696, #11773)
|
* [funimation] Fix extraction (#10696, #11773)
|
||||||
+ [xfileshare] Add support for vidabc.com (#12589)
|
+ [xfileshare] Add support for vidabc.com (#12589)
|
||||||
+ [xfileshare] Improve extraction and extract hls formats
|
+ [xfileshare] Improve extraction and extract hls formats
|
||||||
+ [crunchyroll] Pass geo verifcation proxy
|
+ [crunchyroll] Pass geo verification proxy
|
||||||
+ [cwtv] Extract ISM formats
|
+ [cwtv] Extract ISM formats
|
||||||
+ [tvplay] Bypass geo restriction
|
+ [tvplay] Bypass geo restriction
|
||||||
+ [vrv] Add support for vrv.co
|
+ [vrv] Add support for vrv.co
|
||||||
|
@ -3394,7 +3838,7 @@ Extractors
|
||||||
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
|
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
|
||||||
+ [toongoggles] Add support for toongoggles.com (#12171)
|
+ [toongoggles] Add support for toongoggles.com (#12171)
|
||||||
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
|
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
|
||||||
+ [discoverynetworks] Add support for more domains and bypass geo restiction
|
+ [discoverynetworks] Add support for more domains and bypass geo restriction
|
||||||
* [openload] Fix extraction (#10408)
|
* [openload] Fix extraction (#10408)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4984,7 +5428,7 @@ version 2016.07.09.1
|
||||||
Fixed/improved extractors
|
Fixed/improved extractors
|
||||||
- youtube
|
- youtube
|
||||||
- ard
|
- ard
|
||||||
- srmediatek (#9373)
|
- srmediathek (#9373)
|
||||||
|
|
||||||
|
|
||||||
version 2016.07.09
|
version 2016.07.09
|
||||||
|
@ -5048,7 +5492,7 @@ Fixed/improved extractors
|
||||||
- kaltura (#5557)
|
- kaltura (#5557)
|
||||||
- la7
|
- la7
|
||||||
- Changed features
|
- Changed features
|
||||||
- Rename --cn-verfication-proxy to --geo-verification-proxy
|
- Rename --cn-verification-proxy to --geo-verification-proxy
|
||||||
Miscellaneous
|
Miscellaneous
|
||||||
- Add script for displaying downloads statistics
|
- Add script for displaying downloads statistics
|
||||||
|
|
||||||
|
|
12
README.md
12
README.md
|
@ -1,4 +1,4 @@
|
||||||
[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl)
|
[![Build Status](https://travis-ci.com/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.com/ytdl-org/youtube-dl)
|
||||||
|
|
||||||
youtube-dl - download videos from youtube.com or other video platforms
|
youtube-dl - download videos from youtube.com or other video platforms
|
||||||
|
|
||||||
|
@ -434,9 +434,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||||
either the path to the binary or its
|
either the path to the binary or its
|
||||||
containing directory.
|
containing directory.
|
||||||
--exec CMD Execute a command on the file after
|
--exec CMD Execute a command on the file after
|
||||||
downloading, similar to find's -exec
|
downloading and post-processing, similar to
|
||||||
syntax. Example: --exec 'adb push {}
|
find's -exec syntax. Example: --exec 'adb
|
||||||
/sdcard/Music/ && rm {}'
|
push {} /sdcard/Music/ && rm {}'
|
||||||
--convert-subs FORMAT Convert the subtitles to other format
|
--convert-subs FORMAT Convert the subtitles to other format
|
||||||
(currently supported: srt|ass|vtt|lrc)
|
(currently supported: srt|ass|vtt|lrc)
|
||||||
|
|
||||||
|
@ -545,7 +545,7 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||||
- `extractor` (string): Name of the extractor
|
- `extractor` (string): Name of the extractor
|
||||||
- `extractor_key` (string): Key name of the extractor
|
- `extractor_key` (string): Key name of the extractor
|
||||||
- `epoch` (numeric): Unix epoch when creating the file
|
- `epoch` (numeric): Unix epoch when creating the file
|
||||||
- `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
|
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||||
- `playlist` (string): Name or id of the playlist that contains the video
|
- `playlist` (string): Name or id of the playlist that contains the video
|
||||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||||
- `playlist_id` (string): Playlist identifier
|
- `playlist_id` (string): Playlist identifier
|
||||||
|
@ -1032,7 +1032,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
|
||||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
# find the correct sorting and add the required base classes so that sublcasses
|
# find the correct sorting and add the required base classes so that subclasses
|
||||||
# can be correctly created
|
# can be correctly created
|
||||||
classes = _ALL_CLASSES[:-1]
|
classes = _ALL_CLASSES[:-1]
|
||||||
ordered_cls = []
|
ordered_cls = []
|
||||||
|
|
|
@ -35,12 +35,15 @@
|
||||||
- **adobetv:video**
|
- **adobetv:video**
|
||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||||
|
- **aenetworks:collection**
|
||||||
|
- **aenetworks:show**
|
||||||
- **afreecatv**: afreecatv.com
|
- **afreecatv**: afreecatv.com
|
||||||
- **AirMozilla**
|
- **AirMozilla**
|
||||||
- **AliExpressLive**
|
- **AliExpressLive**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
|
- **Amara**
|
||||||
- **AMCNetworks**
|
- **AMCNetworks**
|
||||||
- **AmericasTestKitchen**
|
- **AmericasTestKitchen**
|
||||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
|
@ -58,9 +61,10 @@
|
||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
- **ARDBetaMediathek**
|
- **ARDBetaMediathek**
|
||||||
- **Arkena**
|
- **Arkena**
|
||||||
- **arte.tv:+7**
|
- **arte.sky.it**
|
||||||
- **arte.tv:embed**
|
- **ArteTV**
|
||||||
- **arte.tv:playlist**
|
- **ArteTVEmbed**
|
||||||
|
- **ArteTVPlaylist**
|
||||||
- **AsianCrush**
|
- **AsianCrush**
|
||||||
- **AsianCrushPlaylist**
|
- **AsianCrushPlaylist**
|
||||||
- **AtresPlayer**
|
- **AtresPlayer**
|
||||||
|
@ -109,6 +113,7 @@
|
||||||
- **Bloomberg**
|
- **Bloomberg**
|
||||||
- **BokeCC**
|
- **BokeCC**
|
||||||
- **BostonGlobe**
|
- **BostonGlobe**
|
||||||
|
- **Box**
|
||||||
- **Bpb**: Bundeszentrale für politische Bildung
|
- **Bpb**: Bundeszentrale für politische Bildung
|
||||||
- **BR**: Bayerischer Rundfunk
|
- **BR**: Bayerischer Rundfunk
|
||||||
- **BravoTV**
|
- **BravoTV**
|
||||||
|
@ -156,6 +161,7 @@
|
||||||
- **Chilloutzone**
|
- **Chilloutzone**
|
||||||
- **chirbit**
|
- **chirbit**
|
||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
|
- **cielotv.it**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **Cinemax**
|
- **Cinemax**
|
||||||
- **CiscoLiveSearch**
|
- **CiscoLiveSearch**
|
||||||
|
@ -304,6 +310,7 @@
|
||||||
- **FrontendMasters**
|
- **FrontendMasters**
|
||||||
- **FrontendMastersCourse**
|
- **FrontendMastersCourse**
|
||||||
- **FrontendMastersLesson**
|
- **FrontendMastersLesson**
|
||||||
|
- **FujiTVFODPlus7**
|
||||||
- **Funimation**
|
- **Funimation**
|
||||||
- **Funk**
|
- **Funk**
|
||||||
- **Fusion**
|
- **Fusion**
|
||||||
|
@ -417,6 +424,8 @@
|
||||||
- **la7.it**
|
- **la7.it**
|
||||||
- **laola1tv**
|
- **laola1tv**
|
||||||
- **laola1tv:embed**
|
- **laola1tv:embed**
|
||||||
|
- **lbry**
|
||||||
|
- **lbry:channel**
|
||||||
- **LCI**
|
- **LCI**
|
||||||
- **Lcp**
|
- **Lcp**
|
||||||
- **LcpPlay**
|
- **LcpPlay**
|
||||||
|
@ -466,6 +475,7 @@
|
||||||
- **massengeschmack.tv**
|
- **massengeschmack.tv**
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
|
- **MedalTV**
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **media.ccc.de:lists**
|
- **media.ccc.de:lists**
|
||||||
- **Medialaan**
|
- **Medialaan**
|
||||||
|
@ -480,6 +490,7 @@
|
||||||
- **META**
|
- **META**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
|
- **mewatch**
|
||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
- **MGTV**: 芒果TV
|
- **MGTV**: 芒果TV
|
||||||
- **MiaoPai**
|
- **MiaoPai**
|
||||||
|
@ -497,6 +508,7 @@
|
||||||
- **MNetTV**
|
- **MNetTV**
|
||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
- **Mofosex**
|
- **Mofosex**
|
||||||
|
- **MofosexEmbed**
|
||||||
- **Mojvideo**
|
- **Mojvideo**
|
||||||
- **Morningstar**: morningstar.com
|
- **Morningstar**: morningstar.com
|
||||||
- **Motherless**
|
- **Motherless**
|
||||||
|
@ -605,6 +617,7 @@
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
- **NYTimesArticle**
|
- **NYTimesArticle**
|
||||||
|
- **NYTimesCooking**
|
||||||
- **NZZ**
|
- **NZZ**
|
||||||
- **ocw.mit.edu**
|
- **ocw.mit.edu**
|
||||||
- **OdaTV**
|
- **OdaTV**
|
||||||
|
@ -619,11 +632,21 @@
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OoyalaExternal**
|
- **OoyalaExternal**
|
||||||
- **OraTV**
|
- **OraTV**
|
||||||
|
- **orf:burgenland**: Radio Burgenland
|
||||||
- **orf:fm4**: radio FM4
|
- **orf:fm4**: radio FM4
|
||||||
- **orf:fm4:story**: fm4.orf.at stories
|
- **orf:fm4:story**: fm4.orf.at stories
|
||||||
- **orf:iptv**: iptv.ORF.at
|
- **orf:iptv**: iptv.ORF.at
|
||||||
|
- **orf:kaernten**: Radio Kärnten
|
||||||
|
- **orf:noe**: Radio Niederösterreich
|
||||||
|
- **orf:oberoesterreich**: Radio Oberösterreich
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
|
- **orf:oe3**: Radio Österreich 3
|
||||||
|
- **orf:salzburg**: Radio Salzburg
|
||||||
|
- **orf:steiermark**: Radio Steiermark
|
||||||
|
- **orf:tirol**: Radio Tirol
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
|
- **orf:vorarlberg**: Radio Vorarlberg
|
||||||
|
- **orf:wien**: Radio Wien
|
||||||
- **OsnatelTV**
|
- **OsnatelTV**
|
||||||
- **OutsideTV**
|
- **OutsideTV**
|
||||||
- **PacktPub**
|
- **PacktPub**
|
||||||
|
@ -647,10 +670,13 @@
|
||||||
- **PicartoVod**
|
- **PicartoVod**
|
||||||
- **Piksel**
|
- **Piksel**
|
||||||
- **Pinkbike**
|
- **Pinkbike**
|
||||||
|
- **Pinterest**
|
||||||
|
- **PinterestCollection**
|
||||||
- **Pladform**
|
- **Pladform**
|
||||||
- **Platzi**
|
- **Platzi**
|
||||||
- **PlatziCourse**
|
- **PlatziCourse**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
|
- **player.sky.it**
|
||||||
- **PlayPlusTV**
|
- **PlayPlusTV**
|
||||||
- **PlaysTV**
|
- **PlaysTV**
|
||||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||||
|
@ -688,6 +714,7 @@
|
||||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||||
- **QuantumTV**
|
- **QuantumTV**
|
||||||
|
- **Qub**
|
||||||
- **Quickline**
|
- **Quickline**
|
||||||
- **QuicklineLive**
|
- **QuicklineLive**
|
||||||
- **R7**
|
- **R7**
|
||||||
|
@ -706,6 +733,8 @@
|
||||||
- **RayWenderlichCourse**
|
- **RayWenderlichCourse**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
- **RDS**: RDS.ca
|
- **RDS**: RDS.ca
|
||||||
|
- **RedBull**
|
||||||
|
- **RedBullEmbed**
|
||||||
- **RedBullTV**
|
- **RedBullTV**
|
||||||
- **RedBullTVRrnContent**
|
- **RedBullTVRrnContent**
|
||||||
- **Reddit**
|
- **Reddit**
|
||||||
|
@ -740,6 +769,7 @@
|
||||||
- **RTVNH**
|
- **RTVNH**
|
||||||
- **RTVS**
|
- **RTVS**
|
||||||
- **RUHD**
|
- **RUHD**
|
||||||
|
- **RumbleEmbed**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
- **rutube:channel**: Rutube channels
|
- **rutube:channel**: Rutube channels
|
||||||
- **rutube:embed**: Rutube embedded videos
|
- **rutube:embed**: Rutube embedded videos
|
||||||
|
@ -777,6 +807,8 @@
|
||||||
- **Shared**: shared.sx
|
- **Shared**: shared.sx
|
||||||
- **ShowRoomLive**
|
- **ShowRoomLive**
|
||||||
- **Sina**
|
- **Sina**
|
||||||
|
- **sky.it**
|
||||||
|
- **skyacademy.it**
|
||||||
- **SkylineWebcams**
|
- **SkylineWebcams**
|
||||||
- **SkyNews**
|
- **SkyNews**
|
||||||
- **skynewsarabia:article**
|
- **skynewsarabia:article**
|
||||||
|
@ -810,12 +842,14 @@
|
||||||
- **SpankBangPlaylist**
|
- **SpankBangPlaylist**
|
||||||
- **Spankwire**
|
- **Spankwire**
|
||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
|
||||||
- **Spiegeltv**
|
|
||||||
- **sport.francetvinfo.fr**
|
- **sport.francetvinfo.fr**
|
||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBox**
|
- **SportBox**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
|
- **Spreaker**
|
||||||
|
- **SpreakerPage**
|
||||||
|
- **SpreakerShow**
|
||||||
|
- **SpreakerShowPage**
|
||||||
- **SpringboardPlatform**
|
- **SpringboardPlatform**
|
||||||
- **Sprout**
|
- **Sprout**
|
||||||
- **sr:mediathek**: Saarländischer Rundfunk
|
- **sr:mediathek**: Saarländischer Rundfunk
|
||||||
|
@ -867,6 +901,7 @@
|
||||||
- **TeleQuebecEmission**
|
- **TeleQuebecEmission**
|
||||||
- **TeleQuebecLive**
|
- **TeleQuebecLive**
|
||||||
- **TeleQuebecSquat**
|
- **TeleQuebecSquat**
|
||||||
|
- **TeleQuebecVideo**
|
||||||
- **TeleTask**
|
- **TeleTask**
|
||||||
- **Telewebion**
|
- **Telewebion**
|
||||||
- **TennisTV**
|
- **TennisTV**
|
||||||
|
@ -884,7 +919,7 @@
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
- **ThisOldHouse**
|
- **ThisOldHouse**
|
||||||
- **TikTok**
|
- **TikTok**
|
||||||
- **TikTokUser**
|
- **TikTokUser** (Currently broken)
|
||||||
- **tinypic**: tinypic.com videos
|
- **tinypic**: tinypic.com videos
|
||||||
- **TMZ**
|
- **TMZ**
|
||||||
- **TMZArticle**
|
- **TMZArticle**
|
||||||
|
@ -917,11 +952,13 @@
|
||||||
- **TV2DKBornholmPlay**
|
- **TV2DKBornholmPlay**
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **TV5MondePlus**: TV5MONDE+
|
- **TV5MondePlus**: TV5MONDE+
|
||||||
|
- **tv8.it**
|
||||||
- **TVA**
|
- **TVA**
|
||||||
- **TVANouvelles**
|
- **TVANouvelles**
|
||||||
- **TVANouvellesArticle**
|
- **TVANouvellesArticle**
|
||||||
- **TVC**
|
- **TVC**
|
||||||
- **TVCArticle**
|
- **TVCArticle**
|
||||||
|
- **TVer**
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvland.com**
|
- **tvland.com**
|
||||||
- **TVN24**
|
- **TVN24**
|
||||||
|
@ -939,16 +976,13 @@
|
||||||
- **TVPlayHome**
|
- **TVPlayHome**
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
- **TwitCasting**
|
- **TwitCasting**
|
||||||
- **twitch:chapter**
|
|
||||||
- **twitch:clips**
|
- **twitch:clips**
|
||||||
- **twitch:profile**
|
|
||||||
- **twitch:stream**
|
- **twitch:stream**
|
||||||
- **twitch:video**
|
|
||||||
- **twitch:videos:all**
|
|
||||||
- **twitch:videos:highlights**
|
|
||||||
- **twitch:videos:past-broadcasts**
|
|
||||||
- **twitch:videos:uploads**
|
|
||||||
- **twitch:vod**
|
- **twitch:vod**
|
||||||
|
- **TwitchCollection**
|
||||||
|
- **TwitchVideos**
|
||||||
|
- **TwitchVideosClips**
|
||||||
|
- **TwitchVideosCollections**
|
||||||
- **twitter**
|
- **twitter**
|
||||||
- **twitter:amplify**
|
- **twitter:amplify**
|
||||||
- **twitter:broadcast**
|
- **twitter:broadcast**
|
||||||
|
@ -991,6 +1025,8 @@
|
||||||
- **Viddler**
|
- **Viddler**
|
||||||
- **Videa**
|
- **Videa**
|
||||||
- **video.google:search**: Google Video search
|
- **video.google:search**: Google Video search
|
||||||
|
- **video.sky.it**
|
||||||
|
- **video.sky.it:live**
|
||||||
- **VideoDetective**
|
- **VideoDetective**
|
||||||
- **videofy.me**
|
- **videofy.me**
|
||||||
- **videomore**
|
- **videomore**
|
||||||
|
@ -1032,7 +1068,7 @@
|
||||||
- **vk:wallpost**
|
- **vk:wallpost**
|
||||||
- **vlive**
|
- **vlive**
|
||||||
- **vlive:channel**
|
- **vlive:channel**
|
||||||
- **vlive:playlist**
|
- **vlive:post**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VODPl**
|
- **VODPl**
|
||||||
- **VODPlatform**
|
- **VODPlatform**
|
||||||
|
@ -1104,6 +1140,8 @@
|
||||||
- **yahoo:japannews**: Yahoo! Japan News
|
- **yahoo:japannews**: Yahoo! Japan News
|
||||||
- **YandexDisk**
|
- **YandexDisk**
|
||||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||||
|
- **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
|
||||||
|
- **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
|
||||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||||
- **YandexVideo**
|
- **YandexVideo**
|
||||||
|
@ -1121,20 +1159,17 @@
|
||||||
- **YourPorn**
|
- **YourPorn**
|
||||||
- **YourUpload**
|
- **YourUpload**
|
||||||
- **youtube**: YouTube.com
|
- **youtube**: YouTube.com
|
||||||
- **youtube:channel**: YouTube.com channels
|
|
||||||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
||||||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||||
- **youtube:live**: YouTube.com live streams
|
|
||||||
- **youtube:playlist**: YouTube.com playlists
|
- **youtube:playlist**: YouTube.com playlists
|
||||||
- **youtube:playlists**: YouTube.com user/channel playlists
|
|
||||||
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||||
- **youtube:search**: YouTube.com searches
|
- **youtube:search**: YouTube.com searches
|
||||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||||
- **youtube:search_url**: YouTube.com search URLs
|
|
||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:tab**: YouTube.com tab
|
||||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **YoutubeYtBe**
|
||||||
|
- **YoutubeYtUser**
|
||||||
- **Zapiks**
|
- **Zapiks**
|
||||||
- **Zaq1**
|
- **Zaq1**
|
||||||
- **Zattoo**
|
- **Zattoo**
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
"writeinfojson": true,
|
"writeinfojson": true,
|
||||||
"writesubtitles": false,
|
"writesubtitles": false,
|
||||||
"allsubtitles": false,
|
"allsubtitles": false,
|
||||||
"listssubtitles": false,
|
"listsubtitles": false,
|
||||||
"socket_timeout": 20,
|
"socket_timeout": 20,
|
||||||
"fixup": "never"
|
"fixup": "never"
|
||||||
}
|
}
|
||||||
|
|
|
@ -108,6 +108,18 @@ class TestInfoExtractor(unittest.TestCase):
|
||||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||||
|
|
||||||
def test_parse_html5_media_entries(self):
|
def test_parse_html5_media_entries(self):
|
||||||
|
# inline video tag
|
||||||
|
expect_dict(
|
||||||
|
self,
|
||||||
|
self.ie._parse_html5_media_entries(
|
||||||
|
'https://127.0.0.1/video.html',
|
||||||
|
r'<html><video src="/vid.mp4" /></html>', None)[0],
|
||||||
|
{
|
||||||
|
'formats': [{
|
||||||
|
'url': 'https://127.0.0.1/vid.mp4',
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
|
||||||
# from https://www.r18.com/
|
# from https://www.r18.com/
|
||||||
# with kpbs in label
|
# with kpbs in label
|
||||||
expect_dict(
|
expect_dict(
|
||||||
|
|
|
@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
self.assertEqual(downloaded['extractor'], 'testex')
|
self.assertEqual(downloaded['extractor'], 'testex')
|
||||||
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||||
|
|
||||||
|
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||||
|
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||||
|
|
||||||
|
class _YDL(YDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(_YDL, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def trouble(self, s, tb=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
ydl = _YDL({
|
||||||
|
'format': 'extra',
|
||||||
|
'ignoreerrors': True,
|
||||||
|
})
|
||||||
|
|
||||||
|
class VideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'video:(?P<id>\d+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'default',
|
||||||
|
'url': 'url:',
|
||||||
|
}]
|
||||||
|
if video_id == '0':
|
||||||
|
raise ExtractorError('foo')
|
||||||
|
if video_id == '2':
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'extra',
|
||||||
|
'url': TEST_URL,
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': 'Video %s' % video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
class PlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'playlist:'
|
||||||
|
|
||||||
|
def _entries(self):
|
||||||
|
for n in range(3):
|
||||||
|
video_id = compat_str(n)
|
||||||
|
yield {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': VideoIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'url': 'video:%s' % video_id,
|
||||||
|
'title': 'Video Transparent %s' % video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.playlist_result(self._entries())
|
||||||
|
|
||||||
|
ydl.add_info_extractor(VideoIE(ydl))
|
||||||
|
ydl.add_info_extractor(PlaylistIE(ydl))
|
||||||
|
info = ydl.extract_info('playlist:')
|
||||||
|
entries = info['entries']
|
||||||
|
self.assertEqual(len(entries), 3)
|
||||||
|
self.assertTrue(entries[0] is None)
|
||||||
|
self.assertTrue(entries[1] is None)
|
||||||
|
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(entries[2], downloaded)
|
||||||
|
self.assertEqual(downloaded['url'], TEST_URL)
|
||||||
|
self.assertEqual(downloaded['title'], 'Video Transparent 2')
|
||||||
|
self.assertEqual(downloaded['id'], '2')
|
||||||
|
self.assertEqual(downloaded['extractor'], 'Video')
|
||||||
|
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||||
assert_cookie_has_value('HTTPONLY_COOKIE')
|
assert_cookie_has_value('HTTPONLY_COOKIE')
|
||||||
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
||||||
|
|
||||||
|
def test_malformed_cookies(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
# Cookies should be empty since all malformed cookie file entries
|
||||||
|
# will be ignored
|
||||||
|
self.assertFalse(cookiejar._cookies)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -31,16 +31,17 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||||
|
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||||
assertPlaylist('PL63F0C78739B09958')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||||
# Top tracks
|
# Top tracks
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||||
|
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||||
|
@ -51,26 +52,23 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
||||||
|
|
||||||
def test_youtube_channel_matching(self):
|
def test_youtube_channel_matching(self):
|
||||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
|
|
||||||
def test_youtube_user_matching(self):
|
# def test_youtube_user_matching(self):
|
||||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||||
|
|
||||||
def test_youtube_feeds(self):
|
def test_youtube_feeds(self):
|
||||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
|
||||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
|
||||||
|
|
||||||
def test_youtube_show_matching(self):
|
# def test_youtube_search_matching(self):
|
||||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
|
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
def test_youtube_search_matching(self):
|
|
||||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
|
||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
|
|
|
@ -803,6 +803,8 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||||
|
self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
|
||||||
|
self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
|
||||||
|
|
||||||
def test_month_by_name(self):
|
def test_month_by_name(self):
|
||||||
self.assertEqual(month_by_name(None), None)
|
self.assertEqual(month_by_name(None), None)
|
||||||
|
@ -935,6 +937,28 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(d['x'], 1)
|
self.assertEqual(d['x'], 1)
|
||||||
self.assertEqual(d['y'], 'a')
|
self.assertEqual(d['y'], 'a')
|
||||||
|
|
||||||
|
# Just drop ! prefix for now though this results in a wrong value
|
||||||
|
on = js_to_json('''{
|
||||||
|
a: !0,
|
||||||
|
b: !1,
|
||||||
|
c: !!0,
|
||||||
|
d: !!42.42,
|
||||||
|
e: !!![],
|
||||||
|
f: !"abc",
|
||||||
|
g: !"",
|
||||||
|
!42: 42
|
||||||
|
}''')
|
||||||
|
self.assertEqual(json.loads(on), {
|
||||||
|
'a': 0,
|
||||||
|
'b': 1,
|
||||||
|
'c': 0,
|
||||||
|
'd': 42.42,
|
||||||
|
'e': [],
|
||||||
|
'f': "abc",
|
||||||
|
'g': "",
|
||||||
|
'42': 42
|
||||||
|
})
|
||||||
|
|
||||||
on = js_to_json('["abc", "def",]')
|
on = js_to_json('["abc", "def",]')
|
||||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||||
|
|
||||||
|
@ -992,6 +1016,12 @@ class TestUtil(unittest.TestCase):
|
||||||
on = js_to_json('{42:4.2e1}')
|
on = js_to_json('{42:4.2e1}')
|
||||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||||
|
|
||||||
|
on = js_to_json('{ "0x40": "0x40" }')
|
||||||
|
self.assertEqual(json.loads(on), {'0x40': '0x40'})
|
||||||
|
|
||||||
|
on = js_to_json('{ "040": "040" }')
|
||||||
|
self.assertEqual(json.loads(on), {'040': '040'})
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
|
@ -267,7 +267,7 @@ class TestYoutubeChapters(unittest.TestCase):
|
||||||
for description, duration, expected_chapters in self._TEST_CASES:
|
for description, duration, expected_chapters in self._TEST_CASES:
|
||||||
ie = YoutubeIE()
|
ie = YoutubeIE()
|
||||||
expect_value(
|
expect_value(
|
||||||
self, ie._extract_chapters(description, duration),
|
self, ie._extract_chapters_from_description(description, duration),
|
||||||
expected_chapters, None)
|
expected_chapters, None)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,28 @@ _TESTS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestPlayerInfo(unittest.TestCase):
|
||||||
|
def test_youtube_extract_player_info(self):
|
||||||
|
PLAYER_URLS = (
|
||||||
|
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||||
|
# obsolete
|
||||||
|
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
|
||||||
|
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
||||||
|
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
||||||
|
('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
|
||||||
|
('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
|
||||||
|
)
|
||||||
|
for player_url, expected_player_id in PLAYER_URLS:
|
||||||
|
expected_player_type = player_url.split('.')[-1]
|
||||||
|
player_type, player_id = YoutubeIE._extract_player_info(player_url)
|
||||||
|
self.assertEqual(player_type, expected_player_type)
|
||||||
|
self.assertEqual(player_id, expected_player_id)
|
||||||
|
|
||||||
|
|
||||||
class TestSignature(unittest.TestCase):
|
class TestSignature(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Netscape HTTP Cookie File
|
||||||
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
# Cookie file entry with invalid number of fields - 6 instead of 7
|
||||||
|
www.foobar.foobar FALSE / FALSE 0 COOKIE
|
||||||
|
|
||||||
|
# Cookie file entry with invalid expires at
|
||||||
|
www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
|
|
@ -793,21 +793,14 @@ class YoutubeDL(object):
|
||||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||||
'and will probably not work.')
|
'and will probably not work.')
|
||||||
|
|
||||||
|
return self.__extract_info(url, ie, download, extra_info, process)
|
||||||
|
else:
|
||||||
|
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||||
|
|
||||||
|
def __handle_extraction_exceptions(func):
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
try:
|
try:
|
||||||
ie_result = ie.extract(url)
|
return func(self, *args, **kwargs)
|
||||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
|
||||||
break
|
|
||||||
if isinstance(ie_result, list):
|
|
||||||
# Backwards compatibility: old IE result format
|
|
||||||
ie_result = {
|
|
||||||
'_type': 'compat_list',
|
|
||||||
'entries': ie_result,
|
|
||||||
}
|
|
||||||
self.add_default_extra_info(ie_result, ie, url)
|
|
||||||
if process:
|
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
|
||||||
else:
|
|
||||||
return ie_result
|
|
||||||
except GeoRestrictedError as e:
|
except GeoRestrictedError as e:
|
||||||
msg = e.msg
|
msg = e.msg
|
||||||
if e.countries:
|
if e.countries:
|
||||||
|
@ -815,20 +808,33 @@ class YoutubeDL(object):
|
||||||
map(ISO3166Utils.short2full, e.countries))
|
map(ISO3166Utils.short2full, e.countries))
|
||||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||||
self.report_error(msg)
|
self.report_error(msg)
|
||||||
break
|
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
self.report_error(compat_str(e), e.format_traceback())
|
||||||
break
|
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.params.get('ignoreerrors', False):
|
if self.params.get('ignoreerrors', False):
|
||||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
@__handle_extraction_exceptions
|
||||||
|
def __extract_info(self, url, ie, download, extra_info, process):
|
||||||
|
ie_result = ie.extract(url)
|
||||||
|
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||||
|
return
|
||||||
|
if isinstance(ie_result, list):
|
||||||
|
# Backwards compatibility: old IE result format
|
||||||
|
ie_result = {
|
||||||
|
'_type': 'compat_list',
|
||||||
|
'entries': ie_result,
|
||||||
|
}
|
||||||
|
self.add_default_extra_info(ie_result, ie, url)
|
||||||
|
if process:
|
||||||
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
return ie_result
|
||||||
|
|
||||||
def add_default_extra_info(self, ie_result, ie, url):
|
def add_default_extra_info(self, ie_result, ie, url):
|
||||||
self.add_extra_info(ie_result, {
|
self.add_extra_info(ie_result, {
|
||||||
|
@ -1003,9 +1009,8 @@ class YoutubeDL(object):
|
||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||||
download=download,
|
# TODO: skip failed (empty) entries?
|
||||||
extra_info=extra)
|
|
||||||
playlist_results.append(entry_result)
|
playlist_results.append(entry_result)
|
||||||
ie_result['entries'] = playlist_results
|
ie_result['entries'] = playlist_results
|
||||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||||
|
@ -1034,6 +1039,11 @@ class YoutubeDL(object):
|
||||||
else:
|
else:
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
@__handle_extraction_exceptions
|
||||||
|
def __process_iterable_entry(self, entry, download, extra_info):
|
||||||
|
return self.process_ie_result(
|
||||||
|
entry, download=download, extra_info=extra_info)
|
||||||
|
|
||||||
def _build_format_filter(self, filter_spec):
|
def _build_format_filter(self, filter_spec):
|
||||||
" Returns a function to filter the formats according to the filter_spec "
|
" Returns a function to filter the formats according to the filter_spec "
|
||||||
|
|
||||||
|
@ -1600,7 +1610,7 @@ class YoutubeDL(object):
|
||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format = self._default_format_spec(info_dict, download=download)
|
req_format = self._default_format_spec(info_dict, download=download)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_stdout('[debug] Default format spec: %s' % req_format)
|
self._write_string('[debug] Default format spec: %s\n' % req_format)
|
||||||
|
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format)
|
||||||
|
|
||||||
|
@ -1861,7 +1871,7 @@ class YoutubeDL(object):
|
||||||
for ph in self._progress_hooks:
|
for ph in self._progress_hooks:
|
||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
return fd.download(name, info)
|
return fd.download(name, info)
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
|
|
|
@ -57,6 +57,17 @@ try:
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import cookielib as compat_cookiejar
|
import cookielib as compat_cookiejar
|
||||||
|
|
||||||
|
if sys.version_info[0] == 2:
|
||||||
|
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
||||||
|
def __init__(self, version, name, value, *args, **kwargs):
|
||||||
|
if isinstance(name, compat_str):
|
||||||
|
name = name.encode()
|
||||||
|
if isinstance(value, compat_str):
|
||||||
|
value = value.encode()
|
||||||
|
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.cookies as compat_cookies
|
import http.cookies as compat_cookies
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
|
@ -2334,7 +2345,7 @@ except ImportError: # Python <3.4
|
||||||
|
|
||||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
# and uniform cross-version exceptiong handling
|
# and uniform cross-version exception handling
|
||||||
class compat_HTMLParseError(Exception):
|
class compat_HTMLParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -2987,6 +2998,7 @@ __all__ = [
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
|
'compat_cookiejar_Cookie',
|
||||||
'compat_cookies',
|
'compat_cookies',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
'compat_etree_Element',
|
'compat_etree_Element',
|
||||||
|
|
|
@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):
|
||||||
|
|
||||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||||
success = ctx['dl'].download(fragment_filename, {
|
fragment_info_dict = {
|
||||||
'url': frag_url,
|
'url': frag_url,
|
||||||
'http_headers': headers or info_dict.get('http_headers'),
|
'http_headers': headers or info_dict.get('http_headers'),
|
||||||
})
|
}
|
||||||
|
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False, None
|
return False, None
|
||||||
|
if fragment_info_dict.get('filetime'):
|
||||||
|
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||||
frag_content = down.read()
|
frag_content = down.read()
|
||||||
|
@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
|
||||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||||
else:
|
else:
|
||||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||||
|
if self.params.get('updatetime', True):
|
||||||
|
filetime = ctx.get('fragment_filetime')
|
||||||
|
if filetime:
|
||||||
|
try:
|
||||||
|
os.utime(ctx['filename'], (time.time(), filetime))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
|
|
@ -141,7 +141,7 @@ class HlsFD(FragmentFD):
|
||||||
count = 0
|
count = 0
|
||||||
headers = info_dict.get('http_headers', {})
|
headers = info_dict.get('http_headers', {})
|
||||||
if byte_range:
|
if byte_range:
|
||||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success, frag_content = self._download_fragment(
|
success, frag_content = self._download_fragment(
|
||||||
|
|
|
@ -106,7 +106,14 @@ class HttpFD(FileDownloader):
|
||||||
set_range(request, range_start, range_end)
|
set_range(request, range_start, range_end)
|
||||||
# Establish connection
|
# Establish connection
|
||||||
try:
|
try:
|
||||||
ctx.data = self.ydl.urlopen(request)
|
try:
|
||||||
|
ctx.data = self.ydl.urlopen(request)
|
||||||
|
except (compat_urllib_error.URLError, ) as err:
|
||||||
|
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||||
|
reason = getattr(err, 'reason', None)
|
||||||
|
if isinstance(reason, socket.timeout):
|
||||||
|
raise RetryDownload(err)
|
||||||
|
raise err
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||||
# that don't support resuming and serve a whole file with no Content-Range
|
# that don't support resuming and serve a whole file with no Content-Range
|
||||||
|
@ -218,24 +225,27 @@ class HttpFD(FileDownloader):
|
||||||
|
|
||||||
def retry(e):
|
def retry(e):
|
||||||
to_stdout = ctx.tmpfilename == '-'
|
to_stdout = ctx.tmpfilename == '-'
|
||||||
if not to_stdout:
|
if ctx.stream is not None:
|
||||||
ctx.stream.close()
|
if not to_stdout:
|
||||||
ctx.stream = None
|
ctx.stream.close()
|
||||||
|
ctx.stream = None
|
||||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
raise RetryDownload(e)
|
raise RetryDownload(e)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# Download and write
|
# Download and write
|
||||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
|
||||||
# socket.timeout is a subclass of socket.error but may not have
|
# socket.timeout is a subclass of socket.error but may not have
|
||||||
# errno set
|
# errno set
|
||||||
except socket.timeout as e:
|
except socket.timeout as e:
|
||||||
retry(e)
|
retry(e)
|
||||||
except socket.error as e:
|
except socket.error as e:
|
||||||
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
# SSLError on python 2 (inherits socket.error) may have
|
||||||
raise
|
# no errno set but this error message
|
||||||
retry(e)
|
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
|
||||||
|
retry(e)
|
||||||
|
raise
|
||||||
|
|
||||||
byte_counter += len(data_block)
|
byte_counter += len(data_block)
|
||||||
|
|
||||||
|
@ -299,7 +309,7 @@ class HttpFD(FileDownloader):
|
||||||
'elapsed': now - ctx.start_time,
|
'elapsed': now - ctx.start_time,
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_test and byte_counter == data_len:
|
if data_len is not None and byte_counter == data_len:
|
||||||
break
|
break
|
||||||
|
|
||||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||||
|
|
|
@ -5,20 +5,30 @@ import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
smuggle_url,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
urlencode_postdata,
|
||||||
from ..compat import (
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AENetworksBaseIE(ThePlatformIE):
|
class AENetworksBaseIE(ThePlatformIE):
|
||||||
|
_BASE_URL_REGEX = r'''(?x)https?://
|
||||||
|
(?:(?:www|play|watch)\.)?
|
||||||
|
(?P<domain>
|
||||||
|
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||||
|
fyi\.tv
|
||||||
|
)/'''
|
||||||
_THEPLATFORM_KEY = 'crazyjava'
|
_THEPLATFORM_KEY = 'crazyjava'
|
||||||
_THEPLATFORM_SECRET = 's3cr3t'
|
_THEPLATFORM_SECRET = 's3cr3t'
|
||||||
|
_DOMAIN_MAP = {
|
||||||
|
'history.com': ('HISTORY', 'history'),
|
||||||
|
'aetv.com': ('AETV', 'aetv'),
|
||||||
|
'mylifetime.com': ('LIFETIME', 'lifetime'),
|
||||||
|
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||||
|
'fyi.tv': ('FYI', 'fyi'),
|
||||||
|
'historyvault.com': (None, 'historyvault'),
|
||||||
|
}
|
||||||
|
|
||||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||||
query = {'mbr': 'true'}
|
query = {'mbr': 'true'}
|
||||||
|
@ -31,7 +41,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||||
'assetTypes': 'high_video_s3'
|
'assetTypes': 'high_video_s3'
|
||||||
}, {
|
}, {
|
||||||
'assetTypes': 'high_video_s3',
|
'assetTypes': 'high_video_s3',
|
||||||
'switch': 'hls_ingest_fastly'
|
'switch': 'hls_high_fastly',
|
||||||
}]
|
}]
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -61,20 +71,13 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||||
class AENetworksIE(AENetworksBaseIE):
|
class AENetworksIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'aenetworks'
|
IE_NAME = 'aenetworks'
|
||||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
|
||||||
https?://
|
shows/[^/]+/season-\d+/episode-\d+|
|
||||||
(?:www\.)?
|
(?:
|
||||||
(?P<domain>
|
(?:movie|special)s/[^/]+|
|
||||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
(?:shows/[^/]+/)?videos
|
||||||
fyi\.tv
|
)/[^/?#&]+
|
||||||
)/
|
)'''
|
||||||
(?:
|
|
||||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
|
||||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
|
||||||
specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
|
|
||||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -91,22 +94,23 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
}, {
|
'skip': 'This video is only available for users of participating TV providers.',
|
||||||
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '71889446852',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 5,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'SERIES4317',
|
|
||||||
'title': 'Atlanta Plastic',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 2,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||||
'only_matching': True
|
'info_dict': {
|
||||||
|
'id': '600587331957',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Inlawful Entry',
|
||||||
|
'description': 'md5:57c12115a2b384d883fe64ca50529e08',
|
||||||
|
'timestamp': 1452634428,
|
||||||
|
'upload_date': '20160112',
|
||||||
|
'uploader': 'AENE-NEW',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
@ -117,80 +121,152 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}, {
|
|
||||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
|
||||||
'only_matching': True
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
_DOMAIN_TO_REQUESTOR_ID = {
|
|
||||||
'history.com': 'HISTORY',
|
|
||||||
'aetv.com': 'AETV',
|
|
||||||
'mylifetime.com': 'LIFETIME',
|
|
||||||
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
|
||||||
'fyi.tv': 'FYI',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
domain, canonical = re.match(self._VALID_URL, url).groups()
|
||||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
result = self._download_json(
|
||||||
if show_path:
|
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||||
url_parts = show_path.split('/')
|
canonical, query={'filter[canonical]': '/' + canonical})['results'][0]
|
||||||
url_parts_len = len(url_parts)
|
title = result['title']
|
||||||
if url_parts_len == 1:
|
video_id = result['id']
|
||||||
entries = []
|
media_url = result['publicUrl']
|
||||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
|
||||||
entries.append(self.url_result(
|
|
||||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
|
||||||
if entries:
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
|
||||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
|
||||||
else:
|
|
||||||
# single season
|
|
||||||
url_parts_len = 2
|
|
||||||
if url_parts_len == 2:
|
|
||||||
entries = []
|
|
||||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
|
||||||
episode_attributes = extract_attributes(episode_item)
|
|
||||||
episode_url = compat_urlparse.urljoin(
|
|
||||||
url, episode_attributes['data-canonical'])
|
|
||||||
entries.append(self.url_result(
|
|
||||||
episode_url, 'AENetworks',
|
|
||||||
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
|
||||||
|
|
||||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
|
||||||
media_url = self._search_regex(
|
|
||||||
[r"media_url\s*=\s*'(?P<url>[^']+)'",
|
|
||||||
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
|
|
||||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
|
||||||
webpage, 'video url', group='url')
|
|
||||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
auth = None
|
auth = None
|
||||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
|
||||||
resource = self._get_mvpd_resource(
|
resource = self._get_mvpd_resource(
|
||||||
requestor_id, theplatform_metadata['title'],
|
requestor_id, theplatform_metadata['title'],
|
||||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||||
theplatform_metadata['ratings'][0]['rating'])
|
theplatform_metadata['ratings'][0]['rating'])
|
||||||
auth = self._extract_mvpd_auth(
|
auth = self._extract_mvpd_auth(
|
||||||
url, video_id, requestor_id, resource)
|
url, video_id, requestor_id, resource)
|
||||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
|
||||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||||
|
info.update({
|
||||||
|
'title': title,
|
||||||
|
'series': result.get('seriesName'),
|
||||||
|
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||||
|
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||||
|
})
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||||
|
def _call_api(self, resource, slug, brand, fields):
|
||||||
|
return self._download_json(
|
||||||
|
'https://yoga.appsvcs.aetnd.com/graphql',
|
||||||
|
slug, query={'brand': brand}, data=urlencode_postdata({
|
||||||
|
'query': '''{
|
||||||
|
%s(slug: "%s") {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}''' % (resource, slug, fields),
|
||||||
|
}))['data'][resource]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, slug = re.match(self._VALID_URL, url).groups()
|
||||||
|
_, brand = self._DOMAIN_MAP[domain]
|
||||||
|
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||||
|
base_url = 'http://watch.%s' % domain
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||||
|
doc = self._get_doc(item)
|
||||||
|
canonical = doc.get('canonical')
|
||||||
|
if not canonical:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
|
||||||
|
|
||||||
|
description = None
|
||||||
|
if self._PLAYLIST_DESCRIPTION_KEY:
|
||||||
|
description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist.get('id'),
|
||||||
|
playlist.get(self._PLAYLIST_TITLE_KEY), description)
|
||||||
|
|
||||||
|
|
||||||
|
class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||||
|
IE_NAME = 'aenetworks:collection'
|
||||||
|
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '282',
|
||||||
|
'title': 'America The Story of Us',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
}, {
|
||||||
|
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
_RESOURCE = 'list'
|
||||||
|
_ITEMS_KEY = 'items'
|
||||||
|
_PLAYLIST_TITLE_KEY = 'display_title'
|
||||||
|
_PLAYLIST_DESCRIPTION_KEY = None
|
||||||
|
_FIELDS = '''id
|
||||||
|
display_title
|
||||||
|
items {
|
||||||
|
... on ListVideoItem {
|
||||||
|
doc {
|
||||||
|
canonical
|
||||||
|
id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
|
||||||
|
def _get_doc(self, item):
|
||||||
|
return item.get('doc') or {}
|
||||||
|
|
||||||
|
|
||||||
|
class AENetworksShowIE(AENetworksListBaseIE):
|
||||||
|
IE_NAME = 'aenetworks:show'
|
||||||
|
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.history.com/shows/ancient-aliens',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SH012427480000',
|
||||||
|
'title': 'Ancient Aliens',
|
||||||
|
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 168,
|
||||||
|
}]
|
||||||
|
_RESOURCE = 'series'
|
||||||
|
_ITEMS_KEY = 'episodes'
|
||||||
|
_PLAYLIST_TITLE_KEY = 'title'
|
||||||
|
_PLAYLIST_DESCRIPTION_KEY = 'description'
|
||||||
|
_FIELDS = '''description
|
||||||
|
id
|
||||||
|
title
|
||||||
|
episodes {
|
||||||
|
canonical
|
||||||
|
id
|
||||||
|
}'''
|
||||||
|
|
||||||
|
def _get_doc(self, item):
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
class HistoryTopicIE(AENetworksBaseIE):
|
class HistoryTopicIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'history:topic'
|
IE_NAME = 'history:topic'
|
||||||
IE_DESC = 'History.com Topic'
|
IE_DESC = 'History.com Topic'
|
||||||
|
@ -204,6 +280,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||||
'timestamp': 1375819729,
|
'timestamp': 1375819729,
|
||||||
'upload_date': '20130806',
|
'upload_date': '20130806',
|
||||||
|
'uploader': 'AENE-NEW',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -212,36 +289,8 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def theplatform_url_result(self, theplatform_url, video_id, query):
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'url': smuggle_url(
|
|
||||||
update_url_query(theplatform_url, query),
|
|
||||||
{
|
|
||||||
'sig': {
|
|
||||||
'key': self._THEPLATFORM_KEY,
|
|
||||||
'secret': self._THEPLATFORM_SECRET,
|
|
||||||
},
|
|
||||||
'force_smil_url': True
|
|
||||||
}),
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
return self.url_result(
|
||||||
video_id = self._search_regex(
|
'http://www.history.com/videos/' + display_id,
|
||||||
r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
|
AENetworksIE.ie_key())
|
||||||
result = self._download_json(
|
|
||||||
'https://feeds.video.aetnd.com/api/v2/history/videos',
|
|
||||||
video_id, query={'filter[id]': video_id})['results'][0]
|
|
||||||
title = result['title']
|
|
||||||
info = self._extract_aen_smil(result['publicUrl'], video_id)
|
|
||||||
info.update({
|
|
||||||
'title': title,
|
|
||||||
'description': result.get('description'),
|
|
||||||
'duration': int_or_none(result.get('duration')),
|
|
||||||
'timestamp': int_or_none(result.get('added'), 1000),
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
|
@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||||
if video_element is None or video_element.text is None:
|
if video_element is None or video_element.text is None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s video does not exist' % video_id, expected=True)
|
'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
video_url = video_element.text.strip()
|
video_url = video_element.text.strip()
|
||||||
|
|
||||||
|
|
103
youtube_dl/extractor/amara.py
Normal file
103
youtube_dl/extractor/amara.py
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AmaraIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Youtube
|
||||||
|
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||||
|
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'h6ZuVdvYnfE',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Why jury trials are becoming less common',
|
||||||
|
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20160813',
|
||||||
|
'uploader': 'PBS NewsHour',
|
||||||
|
'uploader_id': 'PBSNewsHour',
|
||||||
|
'timestamp': 1549639570,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Vimeo
|
||||||
|
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||||
|
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '18622084',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Vimeo at CES 2011!',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'timestamp': 1294763658,
|
||||||
|
'upload_date': '20110111',
|
||||||
|
'uploader': 'Sam Morrill',
|
||||||
|
'uploader_id': 'sammorrill'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Direct Link
|
||||||
|
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||||
|
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's8KL7I3jLmh6',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The danger of a single story',
|
||||||
|
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20091007',
|
||||||
|
'timestamp': 1254942511,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._download_json(
|
||||||
|
'https://amara.org/api/videos/%s/' % video_id,
|
||||||
|
video_id, query={'format': 'json'})
|
||||||
|
title = meta['title']
|
||||||
|
video_url = meta['all_urls'][0]
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for language in (meta.get('languages') or []):
|
||||||
|
subtitles_uri = language.get('subtitles_uri')
|
||||||
|
if not (subtitles_uri and language.get('published')):
|
||||||
|
continue
|
||||||
|
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||||
|
for f in ('json', 'srt', 'vtt'):
|
||||||
|
subtitle.append({
|
||||||
|
'ext': f,
|
||||||
|
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'title': title,
|
||||||
|
'description': meta.get('description'),
|
||||||
|
'thumbnail': meta.get('thumbnail'),
|
||||||
|
'duration': int_or_none(meta.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(meta.get('created')),
|
||||||
|
}
|
||||||
|
|
||||||
|
for ie in (YoutubeIE, VimeoIE):
|
||||||
|
if ie.suitable(video_url):
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
|
@ -1,6 +1,8 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -11,25 +13,22 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class AMCNetworksIE(ThePlatformIE):
|
class AMCNetworksIE(ThePlatformIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||||
'md5': '',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 's3MX01Nl4vPH',
|
'id': '4Lq1dzOnZGt0',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Maron - Season 4 - Step 1',
|
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
|
||||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
|
||||||
'age_limit': 17,
|
'upload_date': '20201120',
|
||||||
'upload_date': '20160505',
|
'timestamp': 1605904350,
|
||||||
'timestamp': 1462468831,
|
|
||||||
'uploader': 'AMCN',
|
'uploader': 'AMCN',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Requires TV provider accounts',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -55,32 +54,33 @@ class AMCNetworksIE(ThePlatformIE):
|
||||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_REQUESTOR_ID_MAP = {
|
||||||
|
'amc': 'AMC',
|
||||||
|
'bbcamerica': 'BBCA',
|
||||||
|
'ifc': 'IFC',
|
||||||
|
'sundancetv': 'SUNDANCE',
|
||||||
|
'wetv': 'WETV',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||||
|
properties = self._download_json(
|
||||||
|
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
|
||||||
|
display_id)['data']['properties']
|
||||||
query = {
|
query = {
|
||||||
'mbr': 'true',
|
'mbr': 'true',
|
||||||
'manifest': 'm3u',
|
'manifest': 'm3u',
|
||||||
}
|
}
|
||||||
media_url = self._search_regex(
|
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||||
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||||
webpage, 'media url')
|
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
|
||||||
r'link\.theplatform\.com/s/([^?]+)',
|
|
||||||
media_url, 'theplatform_path'), display_id)
|
|
||||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
video_id = theplatform_metadata['pid']
|
video_id = theplatform_metadata['pid']
|
||||||
title = theplatform_metadata['title']
|
title = theplatform_metadata['title']
|
||||||
rating = try_get(
|
rating = try_get(
|
||||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||||
auth_required = self._search_regex(
|
if properties.get('videoCategory') == 'TVE-Auth':
|
||||||
r'window\.authRequired\s*=\s*(true|false);',
|
|
||||||
webpage, 'auth required')
|
|
||||||
if auth_required == 'true':
|
|
||||||
requestor_id = self._search_regex(
|
|
||||||
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
|
||||||
webpage, 'requestor id')
|
|
||||||
resource = self._get_mvpd_resource(
|
resource = self._get_mvpd_resource(
|
||||||
requestor_id, title, video_id, rating)
|
requestor_id, title, video_id, rating)
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
|
|
|
@ -1,33 +1,33 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
|
||||||
js_to_json,
|
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AmericasTestKitchenIE(InfoExtractor):
|
class AmericasTestKitchenIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5b400b9ee338f922cb06450c',
|
'id': '5b400b9ee338f922cb06450c',
|
||||||
'title': 'Weeknight Japanese Suppers',
|
'title': 'Japanese Suppers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
|
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||||
'thumbnail': r're:^https?://',
|
'thumbnail': r're:^https?://',
|
||||||
'timestamp': 1523664000,
|
'timestamp': 1523664000,
|
||||||
'upload_date': '20180414',
|
'upload_date': '20180414',
|
||||||
'release_date': '20180414',
|
'release_date': '20180410',
|
||||||
'series': "America's Test Kitchen",
|
'series': "America's Test Kitchen",
|
||||||
'season_number': 18,
|
'season_number': 18,
|
||||||
'episode': 'Weeknight Japanese Suppers',
|
'episode': 'Japanese Suppers',
|
||||||
'episode_number': 15,
|
'episode_number': 15,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -36,47 +36,31 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
resource_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
is_episode = resource_type == 'episode'
|
||||||
|
if is_episode:
|
||||||
|
resource_type = 'episodes'
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
resource = self._download_json(
|
||||||
|
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||||
video_data = self._parse_json(
|
video = resource['video'] if is_episode else resource
|
||||||
self._search_regex(
|
episode = resource if is_episode else resource.get('episode') or {}
|
||||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
|
||||||
webpage, 'initial context'),
|
|
||||||
video_id, js_to_json)
|
|
||||||
|
|
||||||
ep_data = try_get(
|
|
||||||
video_data,
|
|
||||||
(lambda x: x['episodeDetail']['content']['data'],
|
|
||||||
lambda x: x['videoDetail']['content']['data']), dict)
|
|
||||||
ep_meta = ep_data.get('full_video', {})
|
|
||||||
|
|
||||||
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
|
|
||||||
|
|
||||||
title = ep_data.get('title') or ep_meta.get('title')
|
|
||||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
|
||||||
'description') or ep_meta.get('description'))
|
|
||||||
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
|
||||||
release_date = unified_strdate(ep_data.get('aired_at'))
|
|
||||||
|
|
||||||
season_number = int_or_none(ep_meta.get('season_number'))
|
|
||||||
episode = ep_meta.get('title')
|
|
||||||
episode_number = int_or_none(ep_meta.get('episode_number'))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
|
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||||
'ie_key': 'Zype',
|
'ie_key': 'Zype',
|
||||||
'title': title,
|
'description': clean_html(video.get('description')),
|
||||||
'description': description,
|
'release_date': unified_strdate(video.get('publishDate')),
|
||||||
'thumbnail': thumbnail,
|
'series': try_get(episode, lambda x: x['show']['title']),
|
||||||
'release_date': release_date,
|
'episode': episode.get('title'),
|
||||||
'series': "America's Test Kitchen",
|
|
||||||
'season_number': season_number,
|
|
||||||
'episode': episode,
|
|
||||||
'episode_number': episode_number,
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -249,7 +249,7 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 14.02.2019
|
# available till 14.02.2019
|
||||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||||
|
@ -263,6 +263,9 @@ class ARDIE(InfoExtractor):
|
||||||
'upload_date': '20180214',
|
'upload_date': '20180214',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -310,9 +313,9 @@ class ARDIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/(?P<client>[^/]+)/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'die-robuste-roswita',
|
'display_id': 'die-robuste-roswita',
|
||||||
|
@ -325,6 +328,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'upload_date': '20191222',
|
'upload_date': '20191222',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -336,7 +348,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
display_id = mobj.group('display_id') or video_id
|
display_id = mobj.group('display_id')
|
||||||
|
if display_id:
|
||||||
|
display_id = display_id.rstrip('/')
|
||||||
|
if not display_id:
|
||||||
|
display_id = video_id
|
||||||
|
|
||||||
player_page = self._download_json(
|
player_page = self._download_json(
|
||||||
'https://api.ardmediathek.de/public-gateway',
|
'https://api.ardmediathek.de/public-gateway',
|
||||||
|
|
|
@ -4,23 +4,57 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
|
||||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
|
||||||
# add tests.
|
|
||||||
|
|
||||||
|
|
||||||
class ArteTVBaseIE(InfoExtractor):
|
class ArteTVBaseIE(InfoExtractor):
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
|
||||||
info = self._download_json(json_url, video_id)
|
_API_BASE = 'https://api.arte.tv/api/player/v1'
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVIE(ArteTVBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||||
|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||||
|
)
|
||||||
|
/(?P<id>\d{6}-\d{3}-[AF])
|
||||||
|
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '088501-000-A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mexico: Stealing Petrol to Survive',
|
||||||
|
'upload_date': '20190628',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
|
||||||
player_info = info['videoJsonPlayer']
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||||
|
@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
if not upload_date_str:
|
if not upload_date_str:
|
||||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||||
|
|
||||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
title = (player_info.get('VTI') or player_info['VID']).strip()
|
||||||
subtitle = player_info.get('VSU', '').strip()
|
subtitle = player_info.get('VSU', '').strip()
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
info_dict = {
|
|
||||||
'id': player_info['VID'],
|
|
||||||
'title': title,
|
|
||||||
'description': player_info.get('VDE'),
|
|
||||||
'upload_date': unified_strdate(upload_date_str),
|
|
||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
|
||||||
}
|
|
||||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||||
|
|
||||||
LANGS = {
|
LANGS = {
|
||||||
|
@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_dict in vsr.items():
|
for format_id, format_dict in vsr.items():
|
||||||
f = dict(format_dict)
|
f = dict(format_dict)
|
||||||
|
format_url = url_or_none(f.get('url'))
|
||||||
|
streamer = f.get('streamer')
|
||||||
|
if not format_url and not streamer:
|
||||||
|
continue
|
||||||
versionCode = f.get('versionCode')
|
versionCode = f.get('versionCode')
|
||||||
l = re.escape(langcode)
|
l = re.escape(langcode)
|
||||||
|
|
||||||
|
@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
else:
|
else:
|
||||||
lang_pref = -1
|
lang_pref = -1
|
||||||
|
|
||||||
|
media_type = f.get('mediaType')
|
||||||
|
if media_type == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
for m3u8_format in m3u8_formats:
|
||||||
|
m3u8_format['language_preference'] = lang_pref
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
continue
|
||||||
|
|
||||||
format = {
|
format = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||||
|
@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
'quality': qfunc(f.get('quality')),
|
'quality': qfunc(f.get('quality')),
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.get('mediaType') == 'rtmp':
|
if media_type == 'rtmp':
|
||||||
format['url'] = f['streamer']
|
format['url'] = f['streamer']
|
||||||
format['play_path'] = 'mp4:' + f['url']
|
format['play_path'] = 'mp4:' + f['url']
|
||||||
format['ext'] = 'flv'
|
format['ext'] = 'flv'
|
||||||
|
@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
|
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
|
|
||||||
self._check_formats(formats, video_id)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
return {
|
||||||
return info_dict
|
'id': player_info.get('VID') or video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': player_info.get('VDE'),
|
||||||
|
'upload_date': unified_strdate(upload_date_str),
|
||||||
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
class ArteTVEmbedIE(InfoExtractor):
|
||||||
IE_NAME = 'arte.tv:+7'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '088501-000-A',
|
'id': '100605-013-A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Mexico: Stealing Petrol to Survive',
|
'title': 'United we Stream November Lockdown Edition #13',
|
||||||
'upload_date': '20190628',
|
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||||
|
'upload_date': '20201116',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
@staticmethod
|
||||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
def _extract_urls(webpage):
|
||||||
return self._extract_from_json_url(
|
return [url for _, url in re.findall(
|
||||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
|
||||||
video_id, lang)
|
webpage)]
|
||||||
|
|
||||||
|
|
||||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
|
||||||
IE_NAME = 'arte.tv:embed'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https://www\.arte\.tv
|
|
||||||
/player/v3/index\.php\?json_url=
|
|
||||||
(?P<json_url>
|
|
||||||
https?://api\.arte\.tv/api/player/v1/config/
|
|
||||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
json_url = qs['json_url'][0]
|
||||||
|
video_id = ArteTVIE._match_id(json_url)
|
||||||
|
return self.url_result(
|
||||||
|
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
IE_NAME = 'arte.tv:playlist'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
collection = self._download_json(
|
collection = self._download_json(
|
||||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
'%s/collectionData/%s/%s?source=videos'
|
||||||
% (lang, playlist_id), playlist_id)
|
% (self._API_BASE, lang, playlist_id), playlist_id)
|
||||||
|
entries = []
|
||||||
|
for video in collection['videos']:
|
||||||
|
if not isinstance(video, dict):
|
||||||
|
continue
|
||||||
|
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
video_id = video.get('programId')
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'alt_title': video.get('subtitle'),
|
||||||
|
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
|
||||||
|
'duration': int_or_none(video.get('durationSeconds')),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'ie_key': ArteTVIE.ie_key(),
|
||||||
|
})
|
||||||
title = collection.get('title')
|
title = collection.get('title')
|
||||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
entries = [
|
|
||||||
self._extract_from_json_url(
|
|
||||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
|
||||||
for video in collection['videos'] if video.get('jsonUrl')]
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
@ -5,10 +6,7 @@ import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
@ -17,30 +15,32 @@ from ..utils import (
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1812978515',
|
'id': '1812978515',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
|
||||||
'duration': 9.8485,
|
'duration': 9.8485,
|
||||||
|
'uploader': 'youtube-dl "\'/\\ä↭',
|
||||||
|
'upload_date': '20121129',
|
||||||
|
'timestamp': 1354224127,
|
||||||
},
|
},
|
||||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||||
}, {
|
}, {
|
||||||
# free download
|
# free download
|
||||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2650410135',
|
'id': '2650410135',
|
||||||
'ext': 'aiff',
|
'ext': 'aiff',
|
||||||
|
@ -79,11 +79,16 @@ class BandcampIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||||
|
return self._parse_json(self._html_search_regex(
|
||||||
|
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||||
|
attr + ' data', group=2), video_id, fatal=fatal)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
title = self._match_id(url)
|
||||||
title = mobj.group('title')
|
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
tralbum = self._extract_data_attr(webpage, title)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
track_id = None
|
track_id = None
|
||||||
track = None
|
track = None
|
||||||
|
@ -91,10 +96,7 @@ class BandcampIE(InfoExtractor):
|
||||||
duration = None
|
duration = None
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
track_info = self._parse_json(
|
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
|
||||||
self._search_regex(
|
|
||||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
|
||||||
webpage, 'track info', default='{}'), title)
|
|
||||||
if track_info:
|
if track_info:
|
||||||
file_ = track_info.get('file')
|
file_ = track_info.get('file')
|
||||||
if isinstance(file_, dict):
|
if isinstance(file_, dict):
|
||||||
|
@ -111,37 +113,25 @@ class BandcampIE(InfoExtractor):
|
||||||
'abr': int_or_none(abr_str),
|
'abr': int_or_none(abr_str),
|
||||||
})
|
})
|
||||||
track = track_info.get('title')
|
track = track_info.get('title')
|
||||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
track_id = str_or_none(
|
||||||
|
track_info.get('track_id') or track_info.get('id'))
|
||||||
track_number = int_or_none(track_info.get('track_num'))
|
track_number = int_or_none(track_info.get('track_num'))
|
||||||
duration = float_or_none(track_info.get('duration'))
|
duration = float_or_none(track_info.get('duration'))
|
||||||
|
|
||||||
def extract(key):
|
embed = self._extract_data_attr(webpage, title, 'embed', False)
|
||||||
return self._search_regex(
|
current = tralbum.get('current') or {}
|
||||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
|
||||||
webpage, key, default=None, group='value')
|
|
||||||
|
|
||||||
artist = extract('artist')
|
|
||||||
album = extract('album_title')
|
|
||||||
timestamp = unified_timestamp(
|
timestamp = unified_timestamp(
|
||||||
extract('publish_date') or extract('album_publish_date'))
|
current.get('publish_date') or tralbum.get('album_publish_date'))
|
||||||
release_date = unified_strdate(extract('album_release_date'))
|
|
||||||
|
|
||||||
download_link = self._search_regex(
|
download_link = tralbum.get('freeDownloadPage')
|
||||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
|
||||||
'download link', default=None, group='url')
|
|
||||||
if download_link:
|
if download_link:
|
||||||
track_id = self._search_regex(
|
track_id = compat_str(tralbum['id'])
|
||||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
|
||||||
webpage, 'track id')
|
|
||||||
|
|
||||||
download_webpage = self._download_webpage(
|
download_webpage = self._download_webpage(
|
||||||
download_link, track_id, 'Downloading free downloads page')
|
download_link, track_id, 'Downloading free downloads page')
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(download_webpage, track_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
track_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
info = try_get(
|
info = try_get(
|
||||||
blob, (lambda x: x['digital_items'][0],
|
blob, (lambda x: x['digital_items'][0],
|
||||||
|
@ -207,20 +197,20 @@ class BandcampIE(InfoExtractor):
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': artist,
|
'uploader': artist,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'release_date': release_date,
|
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'track': track,
|
'track': track,
|
||||||
'track_number': track_number,
|
'track_number': track_number,
|
||||||
'track_id': track_id,
|
'track_id': track_id,
|
||||||
'artist': artist,
|
'artist': artist,
|
||||||
'album': album,
|
'album': embed.get('album_title'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
@ -230,7 +220,10 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1353101989',
|
'id': '1353101989',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Intro',
|
'title': 'Blazo - Intro',
|
||||||
|
'timestamp': 1311756226,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -238,7 +231,10 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38097443',
|
'id': '38097443',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
'timestamp': 1311757238,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -274,6 +270,7 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'title': '"Entropy" EP',
|
'title': '"Entropy" EP',
|
||||||
'uploader_id': 'jstrecords',
|
'uploader_id': 'jstrecords',
|
||||||
'id': 'entropy-ep',
|
'id': 'entropy-ep',
|
||||||
|
'description': 'md5:0ff22959c943622972596062f2f366a5',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
}, {
|
||||||
|
@ -283,6 +280,7 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'id': 'we-are-the-plague',
|
'id': 'we-are-the-plague',
|
||||||
'title': 'WE ARE THE PLAGUE',
|
'title': 'WE ARE THE PLAGUE',
|
||||||
'uploader_id': 'insulters',
|
'uploader_id': 'insulters',
|
||||||
|
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
@ -294,41 +292,34 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
else super(BandcampAlbumIE, cls).suitable(url))
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
|
||||||
uploader_id = mobj.group('subdomain')
|
|
||||||
album_id = mobj.group('album_id')
|
|
||||||
playlist_id = album_id or uploader_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
track_elements = re.findall(
|
tralbum = self._extract_data_attr(webpage, playlist_id)
|
||||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
track_info = tralbum.get('trackinfo')
|
||||||
if not track_elements:
|
if not track_info:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
# Only tracks with duration info have songs
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
compat_urlparse.urljoin(url, t_path),
|
urljoin(url, t['title_link']), BandcampIE.ie_key(),
|
||||||
ie=BandcampIE.ie_key(),
|
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
|
||||||
video_title=self._search_regex(
|
for t in track_info
|
||||||
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
if t.get('duration')]
|
||||||
elem_content, 'track title', fatal=False))
|
|
||||||
for elem_content, t_path in track_elements
|
current = tralbum.get('current') or {}
|
||||||
if self._html_search_meta('duration', elem_content, default=None)]
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
|
||||||
webpage, 'title', fatal=False)
|
|
||||||
if title:
|
|
||||||
title = title.replace(r'\"', '"')
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'title': title,
|
'title': current.get('title'),
|
||||||
|
'description': current.get('about'),
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampWeeklyIE(InfoExtractor):
|
class BandcampWeeklyIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:weekly'
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -343,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
'release_date': '20170404',
|
'release_date': '20170404',
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': 'Magic Moments',
|
'episode': 'Magic Moments',
|
||||||
'episode_number': 208,
|
|
||||||
'episode_id': '224',
|
'episode_id': '224',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'opus-lo',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(webpage, show_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
video_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
show = blob['bcw_show']
|
show = blob['bcw_data'][show_id]
|
||||||
|
|
||||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
|
||||||
# which happens to expose the latest Bandcamp Weekly episode.
|
|
||||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in show['audio_stream'].items():
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
|
@ -390,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
episode_number = None
|
|
||||||
seq = blob.get('bcw_seq')
|
|
||||||
|
|
||||||
if seq and isinstance(seq, list):
|
|
||||||
try:
|
|
||||||
episode_number = next(
|
|
||||||
int_or_none(e.get('episode_number'))
|
|
||||||
for e in seq
|
|
||||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
|
||||||
except StopIteration:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': show_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': show.get('desc') or show.get('short_desc'),
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
'duration': float_or_none(show.get('audio_duration')),
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
|
@ -411,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
'release_date': unified_strdate(show.get('published_date')),
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': show.get('subtitle'),
|
'episode': show.get('subtitle'),
|
||||||
'episode_number': episode_number,
|
'episode_id': show_id,
|
||||||
'episode_id': compat_str(video_id),
|
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
|
@ -528,7 +528,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
|
|
||||||
def get_programme_id(item):
|
def get_programme_id(item):
|
||||||
def get_from_attributes(item):
|
def get_from_attributes(item):
|
||||||
for p in('identifier', 'group'):
|
for p in ('identifier', 'group'):
|
||||||
value = item.get(p)
|
value = item.get(p)
|
||||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||||
return value
|
return value
|
||||||
|
@ -981,7 +981,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
group_id = self._search_regex(
|
group_id = self._search_regex(
|
||||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||||
webpage, 'group id', default=None)
|
webpage, 'group id', default=None)
|
||||||
if playlist_id:
|
if group_id:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||||
ie=BBCCoUkIE.ie_key())
|
ie=BBCCoUkIE.ie_key())
|
||||||
|
@ -1092,10 +1092,26 @@ class BBCIE(BBCCoUkIE):
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
'bbcthree config', default='{}'),
|
'bbcthree config', default='{}'),
|
||||||
playlist_id, transform_source=js_to_json, fatal=False)
|
playlist_id, transform_source=js_to_json, fatal=False) or {}
|
||||||
if bbc3_config:
|
payload = bbc3_config.get('payload') or {}
|
||||||
|
if payload:
|
||||||
|
clip = payload.get('currentClip') or {}
|
||||||
|
clip_vpid = clip.get('vpid')
|
||||||
|
clip_title = clip.get('title')
|
||||||
|
if clip_vpid and clip_title:
|
||||||
|
formats, subtitles = self._download_media_selector(clip_vpid)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': clip_vpid,
|
||||||
|
'title': clip_title,
|
||||||
|
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
|
||||||
|
'description': clip.get('description'),
|
||||||
|
'duration': parse_duration(clip.get('duration')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
bbc3_playlist = try_get(
|
bbc3_playlist = try_get(
|
||||||
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
payload, lambda x: x['content']['bbcMedia']['playlist'],
|
||||||
dict)
|
dict)
|
||||||
if bbc3_playlist:
|
if bbc3_playlist:
|
||||||
playlist_title = bbc3_playlist.get('title') or playlist_title
|
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||||
|
@ -1118,6 +1134,39 @@ class BBCIE(BBCCoUkIE):
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
initial_data = self._parse_json(self._search_regex(
|
||||||
|
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||||
|
'preload state', default='{}'), playlist_id, fatal=False)
|
||||||
|
if initial_data:
|
||||||
|
def parse_media(media):
|
||||||
|
if not media:
|
||||||
|
return
|
||||||
|
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
|
||||||
|
item_id = item.get('id')
|
||||||
|
item_title = item.get('title')
|
||||||
|
if not (item_id and item_title):
|
||||||
|
continue
|
||||||
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': item_id,
|
||||||
|
'title': item_title,
|
||||||
|
'thumbnail': item.get('holdingImageUrl'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
for resp in (initial_data.get('data') or {}).values():
|
||||||
|
name = resp.get('name')
|
||||||
|
if name == 'media-experience':
|
||||||
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
|
elif name == 'article':
|
||||||
|
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||||
|
if block.get('type') != 'media':
|
||||||
|
continue
|
||||||
|
parse_media(block.get('model'))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
|
|
|
@ -1,194 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
clean_html,
|
|
||||||
compat_str,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
try_get,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BeamProBaseIE(InfoExtractor):
|
|
||||||
_API_BASE = 'https://mixer.com/api/v1'
|
|
||||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
|
||||||
|
|
||||||
def _extract_channel_info(self, chan):
|
|
||||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
|
||||||
return {
|
|
||||||
'uploader': chan.get('token') or try_get(
|
|
||||||
chan, lambda x: x['user']['username'], compat_str),
|
|
||||||
'uploader_id': compat_str(user_id) if user_id else None,
|
|
||||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class BeamProLiveIE(BeamProBaseIE):
|
|
||||||
IE_NAME = 'Mixer:live'
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://mixer.com/niterhayven',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '261562',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
|
||||||
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
|
||||||
'thumbnail': r're:https://.*\.jpg$',
|
|
||||||
'timestamp': 1483477281,
|
|
||||||
'upload_date': '20170103',
|
|
||||||
'uploader': 'niterhayven',
|
|
||||||
'uploader_id': '373396',
|
|
||||||
'age_limit': 18,
|
|
||||||
'is_live': True,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'skip': 'niterhayven is offline',
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
channel_name = self._match_id(url)
|
|
||||||
|
|
||||||
chan = self._download_json(
|
|
||||||
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
|
||||||
|
|
||||||
if chan.get('online') is False:
|
|
||||||
raise ExtractorError(
|
|
||||||
'{0} is offline'.format(channel_name), expected=True)
|
|
||||||
|
|
||||||
channel_id = chan['id']
|
|
||||||
|
|
||||||
def manifest_url(kind):
|
|
||||||
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
|
||||||
fatal=False)
|
|
||||||
formats.extend(self._extract_smil_formats(
|
|
||||||
manifest_url('smil'), channel_name, fatal=False))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': compat_str(chan.get('id') or channel_name),
|
|
||||||
'title': self._live_title(chan.get('name') or channel_name),
|
|
||||||
'description': clean_html(chan.get('description')),
|
|
||||||
'thumbnail': try_get(
|
|
||||||
chan, lambda x: x['thumbnail']['url'], compat_str),
|
|
||||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
|
||||||
'is_live': True,
|
|
||||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
info.update(self._extract_channel_info(chan))
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class BeamProVodIE(BeamProBaseIE):
|
|
||||||
IE_NAME = 'Mixer:vod'
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
|
||||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2259830',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'willow8714\'s Channel',
|
|
||||||
'duration': 6828.15,
|
|
||||||
'thumbnail': r're:https://.*source\.png$',
|
|
||||||
'timestamp': 1494046474,
|
|
||||||
'upload_date': '20170506',
|
|
||||||
'uploader': 'willow8714',
|
|
||||||
'uploader_id': '6085379',
|
|
||||||
'age_limit': 13,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _extract_format(vod, vod_type):
|
|
||||||
if not vod.get('baseUrl'):
|
|
||||||
return []
|
|
||||||
|
|
||||||
if vod_type == 'hls':
|
|
||||||
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
|
||||||
elif vod_type == 'raw':
|
|
||||||
filename, protocol = 'source.mp4', 'https'
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
|
|
||||||
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
|
||||||
|
|
||||||
format_id = [vod_type]
|
|
||||||
if isinstance(data.get('Height'), compat_str):
|
|
||||||
format_id.append('%sp' % data['Height'])
|
|
||||||
|
|
||||||
return [{
|
|
||||||
'url': urljoin(vod['baseUrl'], filename),
|
|
||||||
'format_id': '-'.join(format_id),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'protocol': protocol,
|
|
||||||
'width': int_or_none(data.get('Width')),
|
|
||||||
'height': int_or_none(data.get('Height')),
|
|
||||||
'fps': int_or_none(data.get('Fps')),
|
|
||||||
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
vod_id = self._match_id(url)
|
|
||||||
|
|
||||||
vod_info = self._download_json(
|
|
||||||
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
|
||||||
|
|
||||||
state = vod_info.get('state')
|
|
||||||
if state != 'AVAILABLE':
|
|
||||||
raise ExtractorError(
|
|
||||||
'VOD %s is not available (state: %s)' % (vod_id, state),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
thumbnail_url = None
|
|
||||||
|
|
||||||
for vod in vod_info['vods']:
|
|
||||||
vod_type = vod.get('format')
|
|
||||||
if vod_type in ('hls', 'raw'):
|
|
||||||
formats.extend(self._extract_format(vod, vod_type))
|
|
||||||
elif vod_type == 'thumbnail':
|
|
||||||
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': vod_id,
|
|
||||||
'title': vod_info.get('name') or vod_id,
|
|
||||||
'duration': float_or_none(vod_info.get('duration')),
|
|
||||||
'thumbnail': thumbnail_url,
|
|
||||||
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
|
||||||
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
|
||||||
|
|
||||||
return info
|
|
|
@ -25,8 +25,8 @@ class BellMediaIE(InfoExtractor):
|
||||||
etalk|
|
etalk|
|
||||||
marilyn
|
marilyn
|
||||||
)\.ca|
|
)\.ca|
|
||||||
much\.com
|
(?:much|cp24)\.com
|
||||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||||
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
||||||
|
@ -62,6 +62,9 @@ class BellMediaIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.etalk.ca/video?videoid=663455',
|
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cp24.com/video?clipId=1982548',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_DOMAINS = {
|
_DOMAINS = {
|
||||||
'thecomedynetwork': 'comedy',
|
'thecomedynetwork': 'comedy',
|
||||||
|
|
|
@ -3,10 +3,11 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
from ..utils import (
|
from ..compat import (
|
||||||
HEADRequest,
|
compat_b64decode,
|
||||||
int_or_none,
|
compat_urllib_parse_unquote,
|
||||||
)
|
)
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class BIQLEIE(InfoExtractor):
|
class BIQLEIE(InfoExtractor):
|
||||||
|
@ -47,9 +48,16 @@ class BIQLEIE(InfoExtractor):
|
||||||
if VKIE.suitable(embed_url):
|
if VKIE.suitable(embed_url):
|
||||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||||
|
|
||||||
self._request_webpage(
|
embed_page = self._download_webpage(
|
||||||
HEADRequest(embed_url), video_id, headers={'Referer': url})
|
embed_url, video_id, headers={'Referer': url})
|
||||||
video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
|
video_ext = self._get_cookies(embed_url).get('video_ext')
|
||||||
|
if video_ext:
|
||||||
|
video_ext = compat_urllib_parse_unquote(video_ext.value)
|
||||||
|
if not video_ext:
|
||||||
|
video_ext = compat_b64decode(self._search_regex(
|
||||||
|
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
|
||||||
|
embed_page, 'video_ext')).decode()
|
||||||
|
video_id, sig, _, access_token = video_ext.split(':')
|
||||||
item = self._download_json(
|
item = self._download_json(
|
||||||
'https://api.vk.com/method/video.get', video_id,
|
'https://api.vk.com/method/video.get', video_id,
|
||||||
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
||||||
|
|
98
youtube_dl/extractor/box.py
Normal file
98
youtube_dl/extractor/box.py
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
# try_get,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BoxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||||
|
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '510727257538',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||||
|
'uploader': 'MLS Video',
|
||||||
|
'timestamp': 1566320259,
|
||||||
|
'upload_date': '20190820',
|
||||||
|
'uploader_id': '235196876',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, file_id)
|
||||||
|
request_token = self._parse_json(self._search_regex(
|
||||||
|
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||||
|
'Box config'), file_id)['requestToken']
|
||||||
|
access_token = self._download_json(
|
||||||
|
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||||
|
'Downloading token JSON metadata',
|
||||||
|
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Request-Token': request_token,
|
||||||
|
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||||
|
})[file_id]['read']
|
||||||
|
shared_link = 'https://app.box.com/s/' + shared_name
|
||||||
|
f = self._download_json(
|
||||||
|
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||||
|
'Downloading file JSON metadata', headers={
|
||||||
|
'Authorization': 'Bearer ' + access_token,
|
||||||
|
'BoxApi': 'shared_link=' + shared_link,
|
||||||
|
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||||
|
}, query={
|
||||||
|
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||||
|
})
|
||||||
|
title = f['name']
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'access_token': access_token,
|
||||||
|
'shared_link': shared_link
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||||
|
# entry_url_template = try_get(
|
||||||
|
# entry, lambda x: x['content']['url_template'])
|
||||||
|
# if not entry_url_template:
|
||||||
|
# continue
|
||||||
|
# representation = entry.get('representation')
|
||||||
|
# if representation == 'dash':
|
||||||
|
# TODO: append query to every fragment URL
|
||||||
|
# formats.extend(self._extract_mpd_formats(
|
||||||
|
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||||
|
# file_id, query=query))
|
||||||
|
|
||||||
|
authenticated_download_url = f.get('authenticated_download_url')
|
||||||
|
if authenticated_download_url and f.get('is_download_available'):
|
||||||
|
formats.append({
|
||||||
|
'ext': f.get('extension') or determine_ext(title),
|
||||||
|
'filesize': f.get('size'),
|
||||||
|
'format_id': 'download',
|
||||||
|
'url': update_url_query(authenticated_download_url, query),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
creator = f.get('created_by') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': f.get('description') or None,
|
||||||
|
'uploader': creator.get('name'),
|
||||||
|
'timestamp': parse_iso8601(f.get('created_at')),
|
||||||
|
'uploader_id': creator.get('id'),
|
||||||
|
}
|
|
@ -5,32 +5,34 @@ import base64
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
compat_HTTPError,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
clean_html,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
js_to_json,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
str_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
update_url_query,
|
|
||||||
clean_html,
|
|
||||||
mimetype2ext,
|
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -145,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url(cls, object_str):
|
def _build_brightcove_url(cls, object_str):
|
||||||
"""
|
"""
|
||||||
Build a Brightcove url from a xml string containing
|
Build a Brightcove url from a xml string containing
|
||||||
<object class="BrightcoveExperience">{params}</object>
|
<object class="BrightcoveExperience">{params}</object>
|
||||||
|
@ -215,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
return cls._make_brightcove_url(params)
|
return cls._make_brightcove_url(params)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url_from_js(cls, object_js):
|
def _build_brightcove_url_from_js(cls, object_js):
|
||||||
# The layout of JS is as follows:
|
# The layout of JS is as follows:
|
||||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||||
# // build Brightcove <object /> XML
|
# // build Brightcove <object /> XML
|
||||||
|
@ -270,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
).+?>\s*</object>''',
|
).+?>\s*</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
|
||||||
|
|
||||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return list(filter(None, [
|
return list(filter(None, [
|
||||||
cls._build_brighcove_url_from_js(custom_bc)
|
cls._build_brightcove_url_from_js(custom_bc)
|
||||||
for custom_bc in matches]))
|
for custom_bc in matches]))
|
||||||
return [src for _, src in re.findall(
|
return [src for _, src in re.findall(
|
||||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||||
|
@ -424,7 +426,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||||
# [2] looks like:
|
# [2] looks like:
|
||||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||||
r'''(?isx)
|
r'''(?isx)
|
||||||
(<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
(<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
||||||
(?:.*?
|
(?:.*?
|
||||||
(<script[^>]+
|
(<script[^>]+
|
||||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||||
|
@ -553,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE):
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for text_track in json_data.get('text_tracks', []):
|
for text_track in json_data.get('text_tracks', []):
|
||||||
if text_track.get('src'):
|
if text_track.get('kind') != 'captions':
|
||||||
subtitles.setdefault(text_track.get('srclang'), []).append({
|
continue
|
||||||
'url': text_track['src'],
|
text_track_url = url_or_none(text_track.get('src'))
|
||||||
})
|
if not text_track_url:
|
||||||
|
continue
|
||||||
|
lang = (str_or_none(text_track.get('srclang'))
|
||||||
|
or str_or_none(text_track.get('label')) or 'en').lower()
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': text_track_url,
|
||||||
|
})
|
||||||
|
|
||||||
is_live = False
|
is_live = False
|
||||||
duration = float_or_none(json_data.get('duration'), 1000)
|
duration = float_or_none(json_data.get('duration'), 1000)
|
||||||
|
|
|
@ -5,10 +5,16 @@ import codecs
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_chr,
|
||||||
|
compat_ord,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
multipart_encode,
|
multipart_encode,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
random_birthday,
|
random_birthday,
|
||||||
|
@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
|
||||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||||
'view_count', default=None)
|
'view_count', default=None)
|
||||||
average_rating = self._search_regex(
|
average_rating = self._search_regex(
|
||||||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||||
webpage, 'rating', fatal=False, group='rating_value')
|
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||||
|
group='rating_value')
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
|
||||||
'age_limit': 18 if need_confirm_age else 0,
|
'age_limit': 18 if need_confirm_age else 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
||||||
|
def decrypt_file(a):
|
||||||
|
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
||||||
|
a = a.replace(p, '')
|
||||||
|
a = compat_urllib_parse_unquote(a)
|
||||||
|
b = []
|
||||||
|
for c in a:
|
||||||
|
f = compat_ord(c)
|
||||||
|
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
|
||||||
|
a = ''.join(b)
|
||||||
|
a = a.replace('.cda.mp4', '')
|
||||||
|
for p in ('.2cda.pl', '.3cda.pl'):
|
||||||
|
a = a.replace(p, '.cda.pl')
|
||||||
|
if '/upstream' in a:
|
||||||
|
a = a.replace('/upstream', '.mp4/upstream')
|
||||||
|
return 'https://' + a
|
||||||
|
return 'https://' + a + '.mp4'
|
||||||
|
|
||||||
def extract_format(page, version):
|
def extract_format(page, version):
|
||||||
json_str = self._html_search_regex(
|
json_str = self._html_search_regex(
|
||||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||||
|
@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
|
||||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||||
if video['file'].endswith('adc.mp4'):
|
if video['file'].endswith('adc.mp4'):
|
||||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||||
|
elif not video['file'].startswith('http'):
|
||||||
|
video['file'] = decrypt_file(video['file'])
|
||||||
f = {
|
f = {
|
||||||
'url': video['file'],
|
'url': video['file'],
|
||||||
}
|
}
|
||||||
|
@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return info_dict
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
|
return merge_dicts(info_dict, info)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import smuggle_url
|
||||||
|
@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class CNBCVideoIE(InfoExtractor):
|
class CNBCVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
video_id = self._download_json(
|
||||||
video_id = self._search_regex(
|
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
'query': '''{
|
||||||
'video id')
|
page(path: "%s") {
|
||||||
|
vcpsId
|
||||||
|
}
|
||||||
|
}''' % path,
|
||||||
|
})['data']['page']['vcpsId']
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||||
CNBCIE.ie_key())
|
CNBCIE.ie_key())
|
||||||
|
|
|
@ -10,12 +10,13 @@ import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar,
|
compat_cookiejar_Cookie,
|
||||||
compat_cookies,
|
compat_cookies,
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
@ -67,6 +68,7 @@ from ..utils import (
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@ -623,9 +625,12 @@ class InfoExtractor(object):
|
||||||
url_or_request = update_url_query(url_or_request, query)
|
url_or_request = update_url_query(url_or_request, query)
|
||||||
if data is not None or headers:
|
if data is not None or headers:
|
||||||
url_or_request = sanitized_Request(url_or_request, data, headers)
|
url_or_request = sanitized_Request(url_or_request, data, headers)
|
||||||
|
exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
|
||||||
|
if hasattr(ssl, 'CertificateError'):
|
||||||
|
exceptions.append(ssl.CertificateError)
|
||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except tuple(exceptions) as err:
|
||||||
if isinstance(err, compat_urllib_error.HTTPError):
|
if isinstance(err, compat_urllib_error.HTTPError):
|
||||||
if self.__can_accept_status_code(err, expected_status):
|
if self.__can_accept_status_code(err, expected_status):
|
||||||
# Retain reference to error to prevent file object from
|
# Retain reference to error to prevent file object from
|
||||||
|
@ -1182,16 +1187,33 @@ class InfoExtractor(object):
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||||
json_ld = self._search_regex(
|
json_ld_list = list(re.finditer(JSON_LD_RE, html))
|
||||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
|
||||||
default = kwargs.get('default', NO_DEFAULT)
|
default = kwargs.get('default', NO_DEFAULT)
|
||||||
if not json_ld:
|
|
||||||
return default if default is not NO_DEFAULT else {}
|
|
||||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||||
# At the same time `default` may be passed that assumes `fatal=False`
|
# At the same time `default` may be passed that assumes `fatal=False`
|
||||||
# for _search_regex. Let's simulate the same behavior here as well.
|
# for _search_regex. Let's simulate the same behavior here as well.
|
||||||
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||||
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
json_ld = []
|
||||||
|
for mobj in json_ld_list:
|
||||||
|
json_ld_item = self._parse_json(
|
||||||
|
mobj.group('json_ld'), video_id, fatal=fatal)
|
||||||
|
if not json_ld_item:
|
||||||
|
continue
|
||||||
|
if isinstance(json_ld_item, dict):
|
||||||
|
json_ld.append(json_ld_item)
|
||||||
|
elif isinstance(json_ld_item, (list, tuple)):
|
||||||
|
json_ld.extend(json_ld_item)
|
||||||
|
if json_ld:
|
||||||
|
json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||||
|
if json_ld:
|
||||||
|
return json_ld
|
||||||
|
if default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
raise RegexNotFoundError('Unable to extract JSON-LD')
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||||
|
return {}
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||||
if isinstance(json_ld, compat_str):
|
if isinstance(json_ld, compat_str):
|
||||||
|
@ -1227,7 +1249,10 @@ class InfoExtractor(object):
|
||||||
interaction_type = is_e.get('interactionType')
|
interaction_type = is_e.get('interactionType')
|
||||||
if not isinstance(interaction_type, compat_str):
|
if not isinstance(interaction_type, compat_str):
|
||||||
continue
|
continue
|
||||||
interaction_count = int_or_none(is_e.get('userInteractionCount'))
|
# For interaction count some sites provide string instead of
|
||||||
|
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||||
|
# so extracting count with more relaxed str_to_int
|
||||||
|
interaction_count = str_to_int(is_e.get('userInteractionCount'))
|
||||||
if interaction_count is None:
|
if interaction_count is None:
|
||||||
continue
|
continue
|
||||||
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
|
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
|
||||||
|
@ -1247,6 +1272,7 @@ class InfoExtractor(object):
|
||||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
|
'uploader': str_or_none(e.get('author')),
|
||||||
'filesize': float_or_none(e.get('contentSize')),
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
'tbr': int_or_none(e.get('bitrate')),
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
'width': int_or_none(e.get('width')),
|
'width': int_or_none(e.get('width')),
|
||||||
|
@ -1256,10 +1282,10 @@ class InfoExtractor(object):
|
||||||
extract_interaction_statistic(e)
|
extract_interaction_statistic(e)
|
||||||
|
|
||||||
for e in json_ld:
|
for e in json_ld:
|
||||||
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
|
if '@context' in e:
|
||||||
item_type = e.get('@type')
|
item_type = e.get('@type')
|
||||||
if expected_type is not None and expected_type != item_type:
|
if expected_type is not None and expected_type != item_type:
|
||||||
return info
|
continue
|
||||||
if item_type in ('TVEpisode', 'Episode'):
|
if item_type in ('TVEpisode', 'Episode'):
|
||||||
episode_name = unescapeHTML(e.get('name'))
|
episode_name = unescapeHTML(e.get('name'))
|
||||||
info.update({
|
info.update({
|
||||||
|
@ -1293,11 +1319,17 @@ class InfoExtractor(object):
|
||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
continue
|
if expected_type is None:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
video = e.get('video')
|
video = e.get('video')
|
||||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||||
extract_video_object(video)
|
extract_video_object(video)
|
||||||
break
|
if expected_type is None:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -1424,9 +1456,10 @@ class InfoExtractor(object):
|
||||||
try:
|
try:
|
||||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||||
return True
|
return True
|
||||||
except ExtractorError:
|
except ExtractorError as e:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
'%s: %s URL is invalid, skipping: %s'
|
||||||
|
% (video_id, item, error_to_compat_str(e.cause)))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def http_scheme(self):
|
def http_scheme(self):
|
||||||
|
@ -1631,7 +1664,7 @@ class InfoExtractor(object):
|
||||||
# just the media without qualities renditions.
|
# just the media without qualities renditions.
|
||||||
# Fortunately, master playlist can be easily distinguished from media
|
# Fortunately, master playlist can be easily distinguished from media
|
||||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
# master playlist tags MUST NOT appear in a media playlist and vice versa.
|
||||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||||
# media playlist and MUST NOT appear in master playlist thus we can
|
# media playlist and MUST NOT appear in master playlist thus we can
|
||||||
# clearly detect media playlist with this criterion.
|
# clearly detect media playlist with this criterion.
|
||||||
|
@ -2340,6 +2373,8 @@ class InfoExtractor(object):
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
ism_doc, urlh = res
|
ism_doc, urlh = res
|
||||||
|
if ism_doc is None:
|
||||||
|
return []
|
||||||
|
|
||||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||||
|
|
||||||
|
@ -2478,16 +2513,18 @@ class InfoExtractor(object):
|
||||||
# amp-video and amp-audio are very similar to their HTML5 counterparts
|
# amp-video and amp-audio are very similar to their HTML5 counterparts
|
||||||
# so we wll include them right here (see
|
# so we wll include them right here (see
|
||||||
# https://www.ampproject.org/docs/reference/components/amp-video)
|
# https://www.ampproject.org/docs/reference/components/amp-video)
|
||||||
media_tags = [(media_tag, media_type, '')
|
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
||||||
for media_tag, media_type
|
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
|
||||||
in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
|
media_tags = [(media_tag, media_tag_name, media_type, '')
|
||||||
|
for media_tag, media_tag_name, media_type
|
||||||
|
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
||||||
media_tags.extend(re.findall(
|
media_tags.extend(re.findall(
|
||||||
# We only allow video|audio followed by a whitespace or '>'.
|
# We only allow video|audio followed by a whitespace or '>'.
|
||||||
# Allowing more characters may end up in significant slow down (see
|
# Allowing more characters may end up in significant slow down (see
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
|
# https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
|
||||||
# http://www.porntrex.com/maps/videositemap.xml).
|
# http://www.porntrex.com/maps/videositemap.xml).
|
||||||
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
|
||||||
for media_tag, media_type, media_content in media_tags:
|
for media_tag, _, media_type, media_content in media_tags:
|
||||||
media_info = {
|
media_info = {
|
||||||
'formats': [],
|
'formats': [],
|
||||||
'subtitles': {},
|
'subtitles': {},
|
||||||
|
@ -2561,6 +2598,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
hdcore_sign = 'hdcore=3.7.0'
|
hdcore_sign = 'hdcore=3.7.0'
|
||||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||||
hds_host = hosts.get('hds')
|
hds_host = hosts.get('hds')
|
||||||
|
@ -2573,13 +2611,38 @@ class InfoExtractor(object):
|
||||||
for entry in f4m_formats:
|
for entry in f4m_formats:
|
||||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||||
formats.extend(f4m_formats)
|
formats.extend(f4m_formats)
|
||||||
|
|
||||||
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||||
hls_host = hosts.get('hls')
|
hls_host = hosts.get('hls')
|
||||||
if hls_host:
|
if hls_host:
|
||||||
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
|
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
|
||||||
|
http_host = hosts.get('http')
|
||||||
|
if http_host and m3u8_formats and 'hdnea=' not in m3u8_url:
|
||||||
|
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
|
||||||
|
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||||
|
qualities_length = len(qualities)
|
||||||
|
if len(m3u8_formats) in (qualities_length, qualities_length + 1):
|
||||||
|
i = 0
|
||||||
|
for f in m3u8_formats:
|
||||||
|
if f['vcodec'] != 'none':
|
||||||
|
for protocol in ('http', 'https'):
|
||||||
|
http_f = f.copy()
|
||||||
|
del http_f['manifest_url']
|
||||||
|
http_url = re.sub(
|
||||||
|
REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
|
||||||
|
http_f.update({
|
||||||
|
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||||
|
'url': http_url,
|
||||||
|
'protocol': protocol,
|
||||||
|
})
|
||||||
|
formats.append(http_f)
|
||||||
|
i += 1
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
|
@ -2818,7 +2881,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
path='/', secure=False, discard=False, rest={}, **kwargs):
|
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||||
cookie = compat_cookiejar.Cookie(
|
cookie = compat_cookiejar_Cookie(
|
||||||
0, name, value, port, port is not None, domain, True,
|
0, name, value, port, port is not None, domain, True,
|
||||||
domain.startswith('.'), path, True, secure, expire_time,
|
domain.startswith('.'), path, True, secure, expire_time,
|
||||||
discard, None, None, rest)
|
discard, None, None, rest)
|
||||||
|
|
|
@ -16,6 +16,8 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'uploader': 'gq',
|
'uploader': 'gq',
|
||||||
'upload_date': '20170321',
|
'upload_date': '20170321',
|
||||||
'timestamp': 1490126427,
|
'timestamp': 1490126427,
|
||||||
|
'description': 'How much grimmer would things be if these people were competent?',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# JS embed
|
# JS embed
|
||||||
|
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||||
'uploader': 'arstechnica',
|
'uploader': 'arstechnica',
|
||||||
'upload_date': '20150916',
|
'upload_date': '20150916',
|
||||||
'timestamp': 1442434955,
|
'timestamp': 1442434920,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||||
|
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for t, caption in video_info.get('captions', {}).items():
|
||||||
|
caption_url = caption.get('src')
|
||||||
|
if not (t in ('vtt', 'srt', 'tml') and caption_url):
|
||||||
|
continue
|
||||||
|
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'season': video_info.get('season_title'),
|
'season': video_info.get('season_title'),
|
||||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
'categories': video_info.get('categories'),
|
'categories': video_info.get('categories'),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
params = self._extract_video_params(webpage, display_id)
|
video = try_get(self._parse_json(self._search_regex(
|
||||||
info = self._search_json_ld(
|
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||||
webpage, display_id, fatal=False)
|
'preload state', '{}'), display_id),
|
||||||
|
lambda x: x['transformed']['video'])
|
||||||
|
if video:
|
||||||
|
params = {'videoId': video['id']}
|
||||||
|
info = {'description': strip_or_none(video.get('description'))}
|
||||||
|
else:
|
||||||
|
params = self._extract_video_params(webpage, display_id)
|
||||||
|
info = self._search_json_ld(
|
||||||
|
webpage, display_id, fatal=False)
|
||||||
info.update(self._extract_video(params))
|
info.update(self._extract_video(params))
|
||||||
return info
|
return info
|
||||||
|
|
|
@ -13,6 +13,7 @@ from ..compat import (
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
@ -25,9 +26,9 @@ from ..utils import (
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
merge_dicts,
|
||||||
remove_end,
|
remove_end,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unified_strdate,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
@ -136,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||||
# rtmp
|
# rtmp
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -157,11 +159,12 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '702409',
|
'id': '702409',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
|
'title': compat_str,
|
||||||
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
|
'description': compat_str,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'TV TOKYO',
|
'uploader': 'Re:Zero Partners',
|
||||||
'upload_date': '20160508',
|
'timestamp': 1462098900,
|
||||||
|
'upload_date': '20160501',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -172,12 +175,13 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '727589',
|
'id': '727589',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
|
'title': compat_str,
|
||||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
'description': compat_str,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Kadokawa Pictures Inc.',
|
'uploader': 'Kadokawa Pictures Inc.',
|
||||||
'upload_date': '20170118',
|
'timestamp': 1484130900,
|
||||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
'upload_date': '20170111',
|
||||||
|
'series': compat_str,
|
||||||
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||||
'season_number': 2,
|
'season_number': 2,
|
||||||
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
||||||
|
@ -200,10 +204,11 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '535080',
|
'id': '535080',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
'title': compat_str,
|
||||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
'description': compat_str,
|
||||||
'uploader': 'Marvelous AQL Inc.',
|
'uploader': 'Marvelous AQL Inc.',
|
||||||
'upload_date': '20091021',
|
'timestamp': 1255512600,
|
||||||
|
'upload_date': '20091014',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# Just test metadata extraction
|
# Just test metadata extraction
|
||||||
|
@ -224,15 +229,17 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||||
# just test metadata extraction
|
# just test metadata extraction
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
}, {
|
}, {
|
||||||
# A video with a vastly different season name compared to the series name
|
# A video with a vastly different season name compared to the series name
|
||||||
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
|
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '590532',
|
'id': '590532',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
|
'title': compat_str,
|
||||||
'description': 'Mahiro and Nyaruko talk about official certification.',
|
'description': compat_str,
|
||||||
'uploader': 'TV TOKYO',
|
'uploader': 'TV TOKYO',
|
||||||
|
'timestamp': 1330956000,
|
||||||
'upload_date': '20120305',
|
'upload_date': '20120305',
|
||||||
'series': 'Nyarko-san: Another Crawling Chaos',
|
'series': 'Nyarko-san: Another Crawling Chaos',
|
||||||
'season': 'Haiyoru! Nyaruani (ONA)',
|
'season': 'Haiyoru! Nyaruani (ONA)',
|
||||||
|
@ -442,23 +449,21 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
webpage, 'language', default=None, group='lang')
|
webpage, 'language', default=None, group='lang')
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
(r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
|
||||||
webpage, 'video_title')
|
r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
|
||||||
|
webpage, 'video_title', default=None)
|
||||||
|
if not video_title:
|
||||||
|
video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
|
||||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||||
video_description = (self._parse_json(self._html_search_regex(
|
video_description = (self._parse_json(self._html_search_regex(
|
||||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||||
if video_description:
|
if video_description:
|
||||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||||
video_upload_date = self._html_search_regex(
|
|
||||||
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
|
|
||||||
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
|
||||||
if video_upload_date:
|
|
||||||
video_upload_date = unified_strdate(video_upload_date)
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
# try looking for both an uploader that's a link and one that's not
|
# try looking for both an uploader that's a link and one that's not
|
||||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||||
webpage, 'video_uploader', fatal=False)
|
webpage, 'video_uploader', default=False)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for stream in media.get('streams', []):
|
for stream in media.get('streams', []):
|
||||||
|
@ -611,14 +616,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||||
webpage, 'season number', default=None))
|
webpage, 'season number', default=None))
|
||||||
|
|
||||||
return {
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
|
||||||
'series': series,
|
'series': series,
|
||||||
'season': season,
|
'season': season,
|
||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
|
@ -626,7 +632,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||||
|
|
|
@ -10,6 +10,8 @@ from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
merge_dicts,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor):
|
||||||
bc_attr['data-bcid'])
|
bc_attr['data-bcid'])
|
||||||
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
||||||
|
|
||||||
|
def add_referer(formats):
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {})['Referer'] = url
|
||||||
|
|
||||||
|
# As of 01.12.2020 this path looks to cover all cases making the rest
|
||||||
|
# of the code unnecessary
|
||||||
|
jwsetup = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
|
||||||
|
default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if jwsetup:
|
||||||
|
info = self._parse_jwplayer_data(
|
||||||
|
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||||
|
base_url=url)
|
||||||
|
add_referer(info['formats'])
|
||||||
|
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
return merge_dicts(info, ld_info)
|
||||||
|
|
||||||
|
# Obsolete
|
||||||
# We first look for clipid, because clipprog always appears before
|
# We first look for clipid, because clipprog always appears before
|
||||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||||
|
@ -165,6 +187,7 @@ class CSpanIE(InfoExtractor):
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||||
|
add_referer(formats)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': '%s_%d' % (video_id, partnum + 1),
|
'id': '%s_%d' % (video_id, partnum + 1),
|
||||||
|
|
|
@ -32,7 +32,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_cookie_value(cookies, name):
|
def _get_cookie_value(cookies, name):
|
||||||
cookie = cookies.get('name')
|
cookie = cookies.get(name)
|
||||||
if cookie:
|
if cookie:
|
||||||
return cookie.value
|
return cookie.value
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from .dplay import DPlayIE
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryNetworksDeIE(DPlayIE):
|
class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||||
|
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -29,7 +29,7 @@ class DRTVIE(InfoExtractor):
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
|
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
|
||||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z_-]+)
|
(?P<id>[\da-z_-]+)
|
||||||
'''
|
'''
|
||||||
|
@ -111,6 +111,9 @@ class DRTVIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
|
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/drtv/program/jagten_220924',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
|
||||||
|
|
||||||
title = get_item('title', preferred_langs) or video_id
|
title = get_item('title', preferred_langs) or video_id
|
||||||
description = get_item('description', preferred_langs)
|
description = get_item('description', preferred_langs)
|
||||||
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||||
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
||||||
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
||||||
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
||||||
|
@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnmail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
|
|
@ -15,7 +15,7 @@ from ..utils import (
|
||||||
class ExpressenIE(InfoExtractor):
|
class ExpressenIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?expressen\.se/
|
(?:www\.)?(?:expressen|di)\.se/
|
||||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||||
tv/(?:[^/]+/)*
|
tv/(?:[^/]+/)*
|
||||||
(?P<id>[^/?#&]+)
|
(?P<id>[^/?#&]+)
|
||||||
|
@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [
|
return [
|
||||||
mobj.group('url') for mobj in re.finditer(
|
mobj.group('url') for mobj in re.finditer(
|
||||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -30,12 +30,15 @@ from .adobetv import (
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
from .aenetworks import (
|
from .aenetworks import (
|
||||||
AENetworksIE,
|
AENetworksIE,
|
||||||
|
AENetworksCollectionIE,
|
||||||
|
AENetworksShowIE,
|
||||||
HistoryTopicIE,
|
HistoryTopicIE,
|
||||||
)
|
)
|
||||||
from .afreecatv import AfreecaTVIE
|
from .afreecatv import AfreecaTVIE
|
||||||
from .airmozilla import AirMozillaIE
|
from .airmozilla import AirMozillaIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
|
from .amara import AmaraIE
|
||||||
from .amcnetworks import AMCNetworksIE
|
from .amcnetworks import AMCNetworksIE
|
||||||
from .americastestkitchen import AmericasTestKitchenIE
|
from .americastestkitchen import AmericasTestKitchenIE
|
||||||
from .animeondemand import AnimeOnDemandIE
|
from .animeondemand import AnimeOnDemandIE
|
||||||
|
@ -58,7 +61,7 @@ from .ard import (
|
||||||
ARDMediathekIE,
|
ARDMediathekIE,
|
||||||
)
|
)
|
||||||
from .arte import (
|
from .arte import (
|
||||||
ArteTVPlus7IE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
@ -88,10 +91,6 @@ from .bbc import (
|
||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
from .beampro import (
|
|
||||||
BeamProLiveIE,
|
|
||||||
BeamProVodIE,
|
|
||||||
)
|
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
from .bellmedia import BellMediaIE
|
from .bellmedia import BellMediaIE
|
||||||
|
@ -122,6 +121,7 @@ from .blinkx import BlinkxIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bostonglobe import BostonGlobeIE
|
from .bostonglobe import BostonGlobeIE
|
||||||
|
from .box import BoxIE
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
from .br import (
|
from .br import (
|
||||||
BRIE,
|
BRIE,
|
||||||
|
@ -392,6 +392,7 @@ from .frontendmasters import (
|
||||||
FrontendMastersLessonIE,
|
FrontendMastersLessonIE,
|
||||||
FrontendMastersCourseIE
|
FrontendMastersCourseIE
|
||||||
)
|
)
|
||||||
|
from .fujitv import FujiTVFODPlus7IE
|
||||||
from .funimation import FunimationIE
|
from .funimation import FunimationIE
|
||||||
from .funk import FunkIE
|
from .funk import FunkIE
|
||||||
from .fusion import FusionIE
|
from .fusion import FusionIE
|
||||||
|
@ -530,6 +531,10 @@ from .laola1tv import (
|
||||||
EHFTVIE,
|
EHFTVIE,
|
||||||
ITTFIE,
|
ITTFIE,
|
||||||
)
|
)
|
||||||
|
from .lbry import (
|
||||||
|
LBRYIE,
|
||||||
|
LBRYChannelIE,
|
||||||
|
)
|
||||||
from .lci import LCIIE
|
from .lci import LCIIE
|
||||||
from .lcp import (
|
from .lcp import (
|
||||||
LcpPlayIE,
|
LcpPlayIE,
|
||||||
|
@ -604,6 +609,7 @@ from .markiza import (
|
||||||
from .massengeschmacktv import MassengeschmackTVIE
|
from .massengeschmacktv import MassengeschmackTVIE
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
|
from .medaltv import MedalTVIE
|
||||||
from .mediaset import MediasetIE
|
from .mediaset import MediasetIE
|
||||||
from .mediasite import (
|
from .mediasite import (
|
||||||
MediasiteIE,
|
MediasiteIE,
|
||||||
|
@ -637,7 +643,10 @@ from .mixcloud import (
|
||||||
from .mlb import MLBIE
|
from .mlb import MLBIE
|
||||||
from .mnet import MnetIE
|
from .mnet import MnetIE
|
||||||
from .moevideo import MoeVideoIE
|
from .moevideo import MoeVideoIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import (
|
||||||
|
MofosexIE,
|
||||||
|
MofosexEmbedIE,
|
||||||
|
)
|
||||||
from .mojvideo import MojvideoIE
|
from .mojvideo import MojvideoIE
|
||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motherless import (
|
from .motherless import (
|
||||||
|
@ -778,6 +787,7 @@ from .ntvru import NTVRuIE
|
||||||
from .nytimes import (
|
from .nytimes import (
|
||||||
NYTimesIE,
|
NYTimesIE,
|
||||||
NYTimesArticleIE,
|
NYTimesArticleIE,
|
||||||
|
NYTimesCookingIE,
|
||||||
)
|
)
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .nzz import NZZIE
|
from .nzz import NZZIE
|
||||||
|
@ -802,6 +812,16 @@ from .orf import (
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
ORFFM4StoryIE,
|
ORFFM4StoryIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
|
ORFOE3IE,
|
||||||
|
ORFNOEIE,
|
||||||
|
ORFWIEIE,
|
||||||
|
ORFBGLIE,
|
||||||
|
ORFOOEIE,
|
||||||
|
ORFSTMIE,
|
||||||
|
ORFKTNIE,
|
||||||
|
ORFSBGIE,
|
||||||
|
ORFTIRIE,
|
||||||
|
ORFVBGIE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
from .outsidetv import OutsideTVIE
|
from .outsidetv import OutsideTVIE
|
||||||
|
@ -830,6 +850,10 @@ from .picarto import (
|
||||||
)
|
)
|
||||||
from .piksel import PikselIE
|
from .piksel import PikselIE
|
||||||
from .pinkbike import PinkbikeIE
|
from .pinkbike import PinkbikeIE
|
||||||
|
from .pinterest import (
|
||||||
|
PinterestIE,
|
||||||
|
PinterestCollectionIE,
|
||||||
|
)
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .platzi import (
|
from .platzi import (
|
||||||
PlatziIE,
|
PlatziIE,
|
||||||
|
@ -906,7 +930,9 @@ from .rbmaradio import RBMARadioIE
|
||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
from .redbulltv import (
|
from .redbulltv import (
|
||||||
RedBullTVIE,
|
RedBullTVIE,
|
||||||
|
RedBullEmbedIE,
|
||||||
RedBullTVRrnContentIE,
|
RedBullTVRrnContentIE,
|
||||||
|
RedBullIE,
|
||||||
)
|
)
|
||||||
from .reddit import (
|
from .reddit import (
|
||||||
RedditIE,
|
RedditIE,
|
||||||
|
@ -943,6 +969,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
|
||||||
from .rtvnh import RTVNHIE
|
from .rtvnh import RTVNHIE
|
||||||
from .rtvs import RTVSIE
|
from .rtvs import RTVSIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
|
from .rumble import RumbleEmbedIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
@ -993,6 +1020,16 @@ from .shared import (
|
||||||
from .showroomlive import ShowRoomLiveIE
|
from .showroomlive import ShowRoomLiveIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .sixplay import SixPlayIE
|
from .sixplay import SixPlayIE
|
||||||
|
from .skyit import (
|
||||||
|
SkyItPlayerIE,
|
||||||
|
SkyItVideoIE,
|
||||||
|
SkyItVideoLiveIE,
|
||||||
|
SkyItIE,
|
||||||
|
SkyItAcademyIE,
|
||||||
|
SkyItArteIE,
|
||||||
|
CieloTVItIE,
|
||||||
|
TV8ItIE,
|
||||||
|
)
|
||||||
from .skylinewebcams import SkylineWebcamsIE
|
from .skylinewebcams import SkylineWebcamsIE
|
||||||
from .skynewsarabia import (
|
from .skynewsarabia import (
|
||||||
SkyNewsArabiaIE,
|
SkyNewsArabiaIE,
|
||||||
|
@ -1005,12 +1042,6 @@ from .sky import (
|
||||||
from .slideshare import SlideshareIE
|
from .slideshare import SlideshareIE
|
||||||
from .slideslive import SlidesLiveIE
|
from .slideslive import SlidesLiveIE
|
||||||
from .slutload import SlutloadIE
|
from .slutload import SlutloadIE
|
||||||
from .smotri import (
|
|
||||||
SmotriIE,
|
|
||||||
SmotriCommunityIE,
|
|
||||||
SmotriUserIE,
|
|
||||||
SmotriBroadcastIE,
|
|
||||||
)
|
|
||||||
from .snotr import SnotrIE
|
from .snotr import SnotrIE
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
from .sonyliv import SonyLIVIE
|
from .sonyliv import SonyLIVIE
|
||||||
|
@ -1039,8 +1070,7 @@ from .spankbang import (
|
||||||
SpankBangPlaylistIE,
|
SpankBangPlaylistIE,
|
||||||
)
|
)
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
|
||||||
from .spike import (
|
from .spike import (
|
||||||
BellatorIE,
|
BellatorIE,
|
||||||
ParamountNetworkIE,
|
ParamountNetworkIE,
|
||||||
|
@ -1049,6 +1079,12 @@ from .stitcher import StitcherIE
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
|
from .spreaker import (
|
||||||
|
SpreakerIE,
|
||||||
|
SpreakerPageIE,
|
||||||
|
SpreakerShowIE,
|
||||||
|
SpreakerShowPageIE,
|
||||||
|
)
|
||||||
from .springboardplatform import SpringboardPlatformIE
|
from .springboardplatform import SpringboardPlatformIE
|
||||||
from .sprout import SproutIE
|
from .sprout import SproutIE
|
||||||
from .srgssr import (
|
from .srgssr import (
|
||||||
|
@ -1110,6 +1146,7 @@ from .telequebec import (
|
||||||
TeleQuebecSquatIE,
|
TeleQuebecSquatIE,
|
||||||
TeleQuebecEmissionIE,
|
TeleQuebecEmissionIE,
|
||||||
TeleQuebecLiveIE,
|
TeleQuebecLiveIE,
|
||||||
|
TeleQuebecVideoIE,
|
||||||
)
|
)
|
||||||
from .teletask import TeleTaskIE
|
from .teletask import TeleTaskIE
|
||||||
from .telewebion import TelewebionIE
|
from .telewebion import TelewebionIE
|
||||||
|
@ -1146,7 +1183,10 @@ from .tnaflix import (
|
||||||
EMPFlixIE,
|
EMPFlixIE,
|
||||||
MovieFapIE,
|
MovieFapIE,
|
||||||
)
|
)
|
||||||
from .toggle import ToggleIE
|
from .toggle import (
|
||||||
|
ToggleIE,
|
||||||
|
MeWatchIE,
|
||||||
|
)
|
||||||
from .tonline import TOnlineIE
|
from .tonline import TOnlineIE
|
||||||
from .toongoggles import ToonGogglesIE
|
from .toongoggles import ToonGogglesIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
|
@ -1179,7 +1219,10 @@ from .tv2dk import (
|
||||||
from .tv2hu import TV2HuIE
|
from .tv2hu import TV2HuIE
|
||||||
from .tv4 import TV4IE
|
from .tv4 import TV4IE
|
||||||
from .tv5mondeplus import TV5MondePlusIE
|
from .tv5mondeplus import TV5MondePlusIE
|
||||||
from .tva import TVAIE
|
from .tva import (
|
||||||
|
TVAIE,
|
||||||
|
QubIE,
|
||||||
|
)
|
||||||
from .tvanouvelles import (
|
from .tvanouvelles import (
|
||||||
TVANouvellesIE,
|
TVANouvellesIE,
|
||||||
TVANouvellesArticleIE,
|
TVANouvellesArticleIE,
|
||||||
|
@ -1188,6 +1231,7 @@ from .tvc import (
|
||||||
TVCIE,
|
TVCIE,
|
||||||
TVCArticleIE,
|
TVCArticleIE,
|
||||||
)
|
)
|
||||||
|
from .tver import TVerIE
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
from .tvn24 import TVN24IE
|
from .tvn24 import TVN24IE
|
||||||
|
@ -1217,14 +1261,11 @@ from .twentymin import TwentyMinutenIE
|
||||||
from .twentythreevideo import TwentyThreeVideoIE
|
from .twentythreevideo import TwentyThreeVideoIE
|
||||||
from .twitcasting import TwitCastingIE
|
from .twitcasting import TwitCastingIE
|
||||||
from .twitch import (
|
from .twitch import (
|
||||||
TwitchVideoIE,
|
|
||||||
TwitchChapterIE,
|
|
||||||
TwitchVodIE,
|
TwitchVodIE,
|
||||||
TwitchProfileIE,
|
TwitchCollectionIE,
|
||||||
TwitchAllVideosIE,
|
TwitchVideosIE,
|
||||||
TwitchUploadsIE,
|
TwitchVideosClipsIE,
|
||||||
TwitchPastBroadcastsIE,
|
TwitchVideosCollectionsIE,
|
||||||
TwitchHighlightsIE,
|
|
||||||
TwitchStreamIE,
|
TwitchStreamIE,
|
||||||
TwitchClipsIE,
|
TwitchClipsIE,
|
||||||
)
|
)
|
||||||
|
@ -1345,8 +1386,8 @@ from .vk import (
|
||||||
)
|
)
|
||||||
from .vlive import (
|
from .vlive import (
|
||||||
VLiveIE,
|
VLiveIE,
|
||||||
|
VLivePostIE,
|
||||||
VLiveChannelIE,
|
VLiveChannelIE,
|
||||||
VLivePlaylistIE
|
|
||||||
)
|
)
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
from .vodpl import VODPlIE
|
from .vodpl import VODPlIE
|
||||||
|
@ -1442,6 +1483,8 @@ from .yandexmusic import (
|
||||||
YandexMusicTrackIE,
|
YandexMusicTrackIE,
|
||||||
YandexMusicAlbumIE,
|
YandexMusicAlbumIE,
|
||||||
YandexMusicPlaylistIE,
|
YandexMusicPlaylistIE,
|
||||||
|
YandexMusicArtistTracksIE,
|
||||||
|
YandexMusicArtistAlbumsIE,
|
||||||
)
|
)
|
||||||
from .yandexvideo import YandexVideoIE
|
from .yandexvideo import YandexVideoIE
|
||||||
from .yapfiles import YapFilesIE
|
from .yapfiles import YapFilesIE
|
||||||
|
@ -1463,21 +1506,19 @@ from .yourporn import YourPornIE
|
||||||
from .yourupload import YourUploadIE
|
from .yourupload import YourUploadIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
YoutubeHistoryIE,
|
YoutubeHistoryIE,
|
||||||
YoutubeLiveIE,
|
YoutubeTabIE,
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubePlaylistsIE,
|
|
||||||
YoutubeRecommendedIE,
|
YoutubeRecommendedIE,
|
||||||
YoutubeSearchDateIE,
|
YoutubeSearchDateIE,
|
||||||
YoutubeSearchIE,
|
YoutubeSearchIE,
|
||||||
YoutubeSearchURLIE,
|
#YoutubeSearchURLIE,
|
||||||
YoutubeShowIE,
|
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTruncatedIDIE,
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeUserIE,
|
YoutubeYtBeIE,
|
||||||
|
YoutubeYtUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .zapiks import ZapiksIE
|
from .zapiks import ZapiksIE
|
||||||
|
|
|
@ -54,8 +54,6 @@ class FacebookIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
IE_NAME = 'facebook'
|
IE_NAME = 'facebook'
|
||||||
|
|
||||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
|
||||||
|
|
||||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||||
|
|
||||||
|
@ -306,9 +304,7 @@ class FacebookIE(InfoExtractor):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
|
def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
|
||||||
req = sanitized_Request(url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
|
||||||
webpage = self._download_webpage(req, video_id)
|
|
||||||
|
|
||||||
video_data = None
|
video_data = None
|
||||||
|
|
||||||
|
@ -466,15 +462,18 @@ class FacebookIE(InfoExtractor):
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
if '/posts/' in url:
|
if '/posts/' in url:
|
||||||
entries = [
|
video_id_json = self._search_regex(
|
||||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
|
||||||
for vid in self._parse_json(
|
default='')
|
||||||
self._search_regex(
|
if video_id_json:
|
||||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
entries = [
|
||||||
webpage, 'video ids', group='ids'),
|
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
||||||
video_id)]
|
for vid in self._parse_json(video_id_json, video_id)]
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
return self.playlist_result(entries, video_id)
|
# Single Video?
|
||||||
|
video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
|
||||||
|
return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||||
else:
|
else:
|
||||||
_, info_dict = self._extract_from_url(
|
_, info_dict = self._extract_from_url(
|
||||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||||
|
|
|
@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'upload_date': '20160907',
|
'upload_date': '20160907',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||||
|
|
||||||
upload_date_str = self._search_regex(
|
upload_date_str = self._search_regex(
|
||||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||||
|
@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
|
|
||||||
|
@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
|
||||||
|
|
||||||
is_live = None
|
is_live = None
|
||||||
|
|
||||||
formats = []
|
videos = []
|
||||||
for video in info['videos']:
|
|
||||||
if video['statut'] != 'ONLINE':
|
for video in (info.get('videos') or []):
|
||||||
|
if video.get('statut') != 'ONLINE':
|
||||||
continue
|
continue
|
||||||
video_url = video['url']
|
if not video.get('url'):
|
||||||
|
continue
|
||||||
|
videos.append(video)
|
||||||
|
|
||||||
|
if not videos:
|
||||||
|
for device_type in ['desktop', 'mobile']:
|
||||||
|
fallback_info = self._download_json(
|
||||||
|
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||||
|
video_id, 'Downloading fallback %s video JSON' % device_type, query={
|
||||||
|
'device_type': device_type,
|
||||||
|
'browser': 'chrome',
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
if fallback_info and fallback_info.get('video'):
|
||||||
|
videos.append(fallback_info['video'])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in videos:
|
||||||
|
video_url = video.get('url')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
if is_live is None:
|
if is_live is None:
|
||||||
is_live = (try_get(
|
is_live = (try_get(
|
||||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
|
||||||
bool) is True) or '/live.francetv.fr/' in video_url
|
or video.get('is_live') is True
|
||||||
format_id = video['format']
|
or '/live.francetv.fr/' in video_url)
|
||||||
|
format_id = video.get('format')
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
if georestricted:
|
if georestricted:
|
||||||
|
@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
|
||||||
sign(video_url, format_id), video_id, 'mp4',
|
sign(video_url, format_id), video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||||
fatal=False))
|
fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
||||||
elif video_url.startswith('rtmp'):
|
elif video_url.startswith('rtmp'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = info['titre']
|
title = info['titre']
|
||||||
|
@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(title) if is_live else title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'description': clean_html(info['synopsis']),
|
'description': clean_html(info.get('synopsis')),
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
||||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
||||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
|
35
youtube_dl/extractor/fujitv.py
Normal file
35
youtube_dl/extractor/fujitv.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class FujiTVFODPlus7IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
|
||||||
|
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
|
||||||
|
_BITRATE_MAP = {
|
||||||
|
300: (320, 180),
|
||||||
|
800: (640, 360),
|
||||||
|
1200: (1280, 720),
|
||||||
|
2000: (1280, 720),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
||||||
|
for f in formats:
|
||||||
|
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||||
|
if wh:
|
||||||
|
f.update({
|
||||||
|
'width': wh[0],
|
||||||
|
'height': wh[1],
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
|
||||||
|
}
|
|
@ -1,16 +1,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .once import OnceIE
|
from .once import OnceIE
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_parse_unquote
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
unescapeHTML,
|
|
||||||
url_basename,
|
|
||||||
dict_get,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GameSpotIE(OnceIE):
|
class GameSpotIE(OnceIE):
|
||||||
|
@ -24,17 +15,16 @@ class GameSpotIE(OnceIE):
|
||||||
'title': 'Arma 3 - Community Guide: SITREP I',
|
'title': 'Arma 3 - Community Guide: SITREP I',
|
||||||
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
||||||
},
|
},
|
||||||
|
'skip': 'manifest URL give HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
||||||
|
'md5': '173ea87ad762cf5d3bf6163dceb255a6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'gs-2300-6424837',
|
'id': 'gs-2300-6424837',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
||||||
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True, # m3u8 downloads
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.gamespot.com/videos/embed/6439218/',
|
'url': 'https://www.gamespot.com/videos/embed/6439218/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -49,90 +39,40 @@ class GameSpotIE(OnceIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
data_video_json = self._search_regex(
|
data_video = self._parse_json(self._html_search_regex(
|
||||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
r'data-video=(["\'])({.*?})\1', webpage,
|
||||||
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
|
'video data', group=2), page_id)
|
||||||
|
title = compat_urllib_parse_unquote(data_video['title'])
|
||||||
streams = data_video['videoStreams']
|
streams = data_video['videoStreams']
|
||||||
|
|
||||||
manifest_url = None
|
|
||||||
formats = []
|
formats = []
|
||||||
f4m_url = streams.get('f4m_stream')
|
|
||||||
if f4m_url:
|
m3u8_url = streams.get('adaptive_stream')
|
||||||
manifest_url = f4m_url
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
|
|
||||||
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
|
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
manifest_url = m3u8_url
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
formats.extend(m3u8_formats)
|
for f in m3u8_formats:
|
||||||
progressive_url = dict_get(
|
formats.append(f)
|
||||||
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
|
http_f = f.copy()
|
||||||
if progressive_url and manifest_url:
|
del http_f['manifest_url']
|
||||||
qualities_basename = self._search_regex(
|
http_f.update({
|
||||||
r'/([^/]+)\.csmil/',
|
'format_id': f['format_id'].replace('hls-', 'http-'),
|
||||||
manifest_url, 'qualities basename', default=None)
|
'protocol': 'http',
|
||||||
if qualities_basename:
|
'url': f['url'].replace('.m3u8', '.mp4'),
|
||||||
QUALITIES_RE = r'((,\d+)+,?)'
|
})
|
||||||
qualities = self._search_regex(
|
formats.append(http_f)
|
||||||
QUALITIES_RE, qualities_basename,
|
|
||||||
'qualities', default=None)
|
|
||||||
if qualities:
|
|
||||||
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
|
|
||||||
qualities.sort()
|
|
||||||
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
|
|
||||||
http_url_basename = url_basename(progressive_url)
|
|
||||||
if m3u8_formats:
|
|
||||||
self._sort_formats(m3u8_formats)
|
|
||||||
m3u8_formats = list(filter(
|
|
||||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
|
||||||
if len(qualities) == len(m3u8_formats):
|
|
||||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
|
||||||
f = m3u8_format.copy()
|
|
||||||
f.update({
|
|
||||||
'url': progressive_url.replace(
|
|
||||||
http_url_basename, http_template % q),
|
|
||||||
'format_id': f['format_id'].replace('hls', 'http'),
|
|
||||||
'protocol': 'http',
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
else:
|
|
||||||
for q in qualities:
|
|
||||||
formats.append({
|
|
||||||
'url': progressive_url.replace(
|
|
||||||
http_url_basename, http_template % q),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': 'http-%d' % q,
|
|
||||||
'tbr': q,
|
|
||||||
})
|
|
||||||
|
|
||||||
onceux_json = self._search_regex(
|
mpd_url = streams.get('adaptive_dash')
|
||||||
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
if mpd_url:
|
||||||
if onceux_json:
|
formats.extend(self._extract_mpd_formats(
|
||||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
mpd_url, page_id, mpd_id='dash', fatal=False))
|
||||||
if onceux_url:
|
|
||||||
formats.extend(self._extract_once_formats(re.sub(
|
|
||||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
|
|
||||||
http_formats_preference=-1))
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
for quality in ['sd', 'hd']:
|
|
||||||
# It's actually a link to a flv file
|
|
||||||
flv_url = streams.get('f4m_{0}'.format(quality))
|
|
||||||
if flv_url is not None:
|
|
||||||
formats.append({
|
|
||||||
'url': flv_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': quality,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': data_video['guid'],
|
'id': data_video.get('guid') or page_id,
|
||||||
'display_id': page_id,
|
'display_id': page_id,
|
||||||
'title': compat_urllib_parse_unquote(data_video['title']),
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._html_search_meta('description', webpage),
|
'description': self._html_search_meta('description', webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
|
|
@ -20,19 +20,23 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
int_or_none,
|
||||||
is_html,
|
is_html,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_duration,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
|
url_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
from .commonprotocols import RtmpIE
|
from .commonprotocols import RtmpIE
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
|
@ -48,7 +52,6 @@ from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .tvc import TVCIE
|
from .tvc import TVCIE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .smotri import SmotriIE
|
|
||||||
from .myvi import MyviIE
|
from .myvi import MyviIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .udn import UDNEmbedIE
|
from .udn import UDNEmbedIE
|
||||||
|
@ -60,6 +63,9 @@ from .tnaflix import TNAFlixNetworkEmbedIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
|
from .mofosex import MofosexEmbedIE
|
||||||
|
from .spankwire import SpankwireIE
|
||||||
|
from .youporn import YouPornIE
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
from .dailymail import DailyMailIE
|
from .dailymail import DailyMailIE
|
||||||
|
@ -88,6 +94,7 @@ from .piksel import PikselIE
|
||||||
from .videa import VideaIE
|
from .videa import VideaIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
|
from .arte import ArteTVEmbedIE
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
from .rutube import RutubeIE
|
from .rutube import RutubeIE
|
||||||
from .limelight import LimelightBaseIE
|
from .limelight import LimelightBaseIE
|
||||||
|
@ -194,11 +201,21 @@ class GenericIE(InfoExtractor):
|
||||||
{
|
{
|
||||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
'ext': 'm4v',
|
'title': 'MSNBC Rachel Maddow (video)',
|
||||||
'upload_date': '20150228',
|
'description': 're:.*her unique approach to storytelling.*',
|
||||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
},
|
||||||
}
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'ext': 'mov',
|
||||||
|
'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
|
||||||
|
'title': 're:MSNBC Rachel Maddow',
|
||||||
|
'description': 're:.*her unique approach to storytelling.*',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': compat_str,
|
||||||
|
'duration': float,
|
||||||
|
},
|
||||||
|
}],
|
||||||
},
|
},
|
||||||
# RSS feed with enclosures and unsupported link URLs
|
# RSS feed with enclosures and unsupported link URLs
|
||||||
{
|
{
|
||||||
|
@ -838,7 +855,7 @@ class GenericIE(InfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# MTVSercices embed
|
# MTVServices embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||||
|
@ -1705,6 +1722,15 @@ class GenericIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
'add_ie': ['Kaltura'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# multiple kaltura embeds, nsfw
|
||||||
|
'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kamila-avec-video-jaime-sadomie',
|
||||||
|
'title': "Kamila avec vídeo “J'aime sadomie”",
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# Non-standard Vimeo embed
|
# Non-standard Vimeo embed
|
||||||
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
||||||
|
@ -2090,23 +2116,23 @@ class GenericIE(InfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
# {
|
||||||
# Zype embed
|
# # Zype embed
|
||||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
# 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||||
'info_dict': {
|
# 'info_dict': {
|
||||||
'id': '5b400b834b32992a310622b9',
|
# 'id': '5b400b834b32992a310622b9',
|
||||||
'ext': 'mp4',
|
# 'ext': 'mp4',
|
||||||
'title': 'Smoky Barbecue Favorites',
|
# 'title': 'Smoky Barbecue Favorites',
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
# 'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
# 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||||
'upload_date': '20170909',
|
# 'upload_date': '20170909',
|
||||||
'timestamp': 1504915200,
|
# 'timestamp': 1504915200,
|
||||||
},
|
# },
|
||||||
'add_ie': [ZypeIE.ie_key()],
|
# 'add_ie': [ZypeIE.ie_key()],
|
||||||
'params': {
|
# 'params': {
|
||||||
'skip_download': True,
|
# 'skip_download': True,
|
||||||
},
|
# },
|
||||||
},
|
# },
|
||||||
{
|
{
|
||||||
# videojs embed
|
# videojs embed
|
||||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||||
|
@ -2167,6 +2193,10 @@ class GenericIE(InfoExtractor):
|
||||||
playlist_desc_el = doc.find('./channel/description')
|
playlist_desc_el = doc.find('./channel/description')
|
||||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||||
|
}
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for it in doc.findall('./channel/item'):
|
for it in doc.findall('./channel/item'):
|
||||||
next_url = None
|
next_url = None
|
||||||
|
@ -2182,10 +2212,33 @@ class GenericIE(InfoExtractor):
|
||||||
if not next_url:
|
if not next_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
def itunes(key):
|
||||||
|
return xpath_text(
|
||||||
|
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
duration = itunes('duration')
|
||||||
|
explicit = itunes('explicit')
|
||||||
|
if explicit == 'true':
|
||||||
|
age_limit = 18
|
||||||
|
elif explicit == 'false':
|
||||||
|
age_limit = 0
|
||||||
|
else:
|
||||||
|
age_limit = None
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': next_url,
|
'url': next_url,
|
||||||
'title': it.find('title').text,
|
'title': it.find('title').text,
|
||||||
|
'description': xpath_text(it, 'description', default=None),
|
||||||
|
'timestamp': unified_timestamp(
|
||||||
|
xpath_text(it, 'pubDate', default=None)),
|
||||||
|
'duration': int_or_none(duration) or parse_duration(duration),
|
||||||
|
'thumbnail': url_or_none(itunes('image')),
|
||||||
|
'episode': itunes('title'),
|
||||||
|
'episode_number': int_or_none(itunes('episode')),
|
||||||
|
'season_number': int_or_none(itunes('season')),
|
||||||
|
'age_limit': age_limit,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -2305,7 +2358,7 @@ class GenericIE(InfoExtractor):
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._generic_title(url),
|
'title': self._generic_title(url),
|
||||||
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
|
@ -2411,7 +2464,9 @@ class GenericIE(InfoExtractor):
|
||||||
# Sometimes embedded video player is hidden behind percent encoding
|
# Sometimes embedded video player is hidden behind percent encoding
|
||||||
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||||
# Unescaping the whole page allows to handle those cases in a generic way
|
# Unescaping the whole page allows to handle those cases in a generic way
|
||||||
webpage = compat_urllib_parse_unquote(webpage)
|
# FIXME: unescaping the whole page may break URLs, commenting out for now.
|
||||||
|
# There probably should be a second run of generic extractor on unescaped webpage.
|
||||||
|
# webpage = compat_urllib_parse_unquote(webpage)
|
||||||
|
|
||||||
# Unescape squarespace embeds to be detected by generic extractor,
|
# Unescape squarespace embeds to be detected by generic extractor,
|
||||||
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||||
|
@ -2715,6 +2770,21 @@ class GenericIE(InfoExtractor):
|
||||||
if tube8_urls:
|
if tube8_urls:
|
||||||
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
|
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
|
||||||
|
|
||||||
|
# Look for embedded Mofosex player
|
||||||
|
mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
|
||||||
|
if mofosex_urls:
|
||||||
|
return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
|
||||||
|
|
||||||
|
# Look for embedded Spankwire player
|
||||||
|
spankwire_urls = SpankwireIE._extract_urls(webpage)
|
||||||
|
if spankwire_urls:
|
||||||
|
return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
|
||||||
|
|
||||||
|
# Look for embedded YouPorn player
|
||||||
|
youporn_urls = YouPornIE._extract_urls(webpage)
|
||||||
|
if youporn_urls:
|
||||||
|
return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded Tvigle player
|
# Look for embedded Tvigle player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||||
|
@ -2733,11 +2803,9 @@ class GenericIE(InfoExtractor):
|
||||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded arte.tv player
|
# Look for embedded arte.tv player
|
||||||
mobj = re.search(
|
arte_urls = ArteTVEmbedIE._extract_urls(webpage)
|
||||||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
|
if arte_urls:
|
||||||
webpage)
|
return self.playlist_from_matches(arte_urls, video_id, video_title)
|
||||||
if mobj is not None:
|
|
||||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
|
||||||
|
|
||||||
# Look for embedded francetv player
|
# Look for embedded francetv player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
@ -2746,11 +2814,6 @@ class GenericIE(InfoExtractor):
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for embedded smotri.com player
|
|
||||||
smotri_url = SmotriIE._extract_url(webpage)
|
|
||||||
if smotri_url:
|
|
||||||
return self.url_result(smotri_url, 'Smotri')
|
|
||||||
|
|
||||||
# Look for embedded Myvi.ru player
|
# Look for embedded Myvi.ru player
|
||||||
myvi_url = MyviIE._extract_url(webpage)
|
myvi_url = MyviIE._extract_url(webpage)
|
||||||
if myvi_url:
|
if myvi_url:
|
||||||
|
@ -2826,9 +2889,12 @@ class GenericIE(InfoExtractor):
|
||||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
# Look for Kaltura embeds
|
# Look for Kaltura embeds
|
||||||
kaltura_url = KalturaIE._extract_url(webpage)
|
kaltura_urls = KalturaIE._extract_urls(webpage)
|
||||||
if kaltura_url:
|
if kaltura_urls:
|
||||||
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
|
return self.playlist_from_matches(
|
||||||
|
kaltura_urls, video_id, video_title,
|
||||||
|
getter=lambda x: smuggle_url(x, {'source_url': url}),
|
||||||
|
ie=KalturaIE.ie_key())
|
||||||
|
|
||||||
# Look for EaglePlatform embeds
|
# Look for EaglePlatform embeds
|
||||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||||
|
|
|
@ -13,10 +13,10 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class GiantBombIE(InfoExtractor):
|
class GiantBombIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
||||||
'md5': 'c8ea694254a59246a42831155dec57ac',
|
'md5': '132f5a803e7e0ab0e274d84bda1e77ae',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2300-9782',
|
'id': '2300-9782',
|
||||||
'display_id': 'quick-look-destiny-the-dark-below',
|
'display_id': 'quick-look-destiny-the-dark-below',
|
||||||
|
@ -26,7 +26,10 @@ class GiantBombIE(InfoExtractor):
|
||||||
'duration': 2399,
|
'duration': 2399,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
|
@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_parse_qs
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
try_get,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
# video can't be watched anonymously due to view count limit reached,
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||||
'info_dict': {
|
|
||||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
|
||||||
'duration': 189,
|
|
||||||
},
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
|
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
video_info = compat_parse_qs(self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
'https://drive.google.com/get_video_info',
|
||||||
|
video_id, query={'docid': video_id}))
|
||||||
|
|
||||||
title = self._search_regex(
|
def get_value(key):
|
||||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
return try_get(video_info, lambda x: x[key][0])
|
||||||
default=None) or self._og_search_title(webpage)
|
|
||||||
duration = int_or_none(self._search_regex(
|
reason = get_value('reason')
|
||||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
title = get_value('title')
|
||||||
default=None))
|
if not title and reason:
|
||||||
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
fmt_stream_map = self._search_regex(
|
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||||
'fmt stream map', default='').split(',')
|
|
||||||
fmt_list = self._search_regex(
|
|
||||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
|
||||||
'fmt_list', default='').split(',')
|
|
||||||
if fmt_stream_map and fmt_list:
|
if fmt_stream_map and fmt_list:
|
||||||
resolutions = {}
|
resolutions = {}
|
||||||
for fmt in fmt_list:
|
for fmt in fmt_list:
|
||||||
|
@ -220,19 +209,27 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'export': 'download',
|
'export': 'download',
|
||||||
})
|
})
|
||||||
urlh = self._request_webpage(
|
|
||||||
source_url, video_id, note='Requesting source file',
|
def request_source_file(source_url, kind):
|
||||||
errnote='Unable to request source file', fatal=False)
|
return self._request_webpage(
|
||||||
|
source_url, video_id, note='Requesting %s file' % kind,
|
||||||
|
errnote='Unable to request %s file' % kind, fatal=False)
|
||||||
|
urlh = request_source_file(source_url, 'source')
|
||||||
if urlh:
|
if urlh:
|
||||||
def add_source_format(src_url):
|
def add_source_format(urlh):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': src_url,
|
# Use redirect URLs as download URLs in order to calculate
|
||||||
|
# correct cookies in _calc_cookies.
|
||||||
|
# Using original URLs may result in redirect loop due to
|
||||||
|
# google.com's cookies mistakenly used for googleusercontent.com
|
||||||
|
# redirect URLs (see #23919).
|
||||||
|
'url': urlh.geturl(),
|
||||||
'ext': determine_ext(title, 'mp4').lower(),
|
'ext': determine_ext(title, 'mp4').lower(),
|
||||||
'format_id': 'source',
|
'format_id': 'source',
|
||||||
'quality': 1,
|
'quality': 1,
|
||||||
})
|
})
|
||||||
if urlh.headers.get('Content-Disposition'):
|
if urlh.headers.get('Content-Disposition'):
|
||||||
add_source_format(source_url)
|
add_source_format(urlh)
|
||||||
else:
|
else:
|
||||||
confirmation_webpage = self._webpage_read_content(
|
confirmation_webpage = self._webpage_read_content(
|
||||||
urlh, url, video_id, note='Downloading confirmation page',
|
urlh, url, video_id, note='Downloading confirmation page',
|
||||||
|
@ -242,23 +239,21 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||||
'confirmation code', fatal=False)
|
'confirmation code', fatal=False)
|
||||||
if confirm:
|
if confirm:
|
||||||
add_source_format(update_url_query(source_url, {
|
confirmed_source_url = update_url_query(source_url, {
|
||||||
'confirm': confirm,
|
'confirm': confirm,
|
||||||
}))
|
})
|
||||||
|
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||||
|
if urlh and urlh.headers.get('Content-Disposition'):
|
||||||
|
add_source_format(urlh)
|
||||||
|
|
||||||
if not formats:
|
if not formats and reason:
|
||||||
reason = self._search_regex(
|
raise ExtractorError(reason, expected=True)
|
||||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
|
||||||
if reason:
|
|
||||||
raise ExtractorError(reason, expected=True)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
hl = self._search_regex(
|
hl = get_value('hl')
|
||||||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
|
||||||
subtitles_id = None
|
subtitles_id = None
|
||||||
ttsurl = self._search_regex(
|
ttsurl = get_value('ttsurl')
|
||||||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
|
||||||
if ttsurl:
|
if ttsurl:
|
||||||
# the video Id for subtitles will be the last value in the ttsurl
|
# the video Id for subtitles will be the last value in the ttsurl
|
||||||
# query string
|
# query string
|
||||||
|
@ -268,8 +263,8 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||||
'duration': duration,
|
'duration': int_or_none(get_value('length_seconds')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||||
'automatic_captions': self.extract_automatic_captions(
|
'automatic_captions': self.extract_automatic_captions(
|
||||||
|
|
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class InaIE(InfoExtractor):
|
class InaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||||
|
@ -31,6 +31,9 @@ class InaIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.ina.fr/video/I12055569',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -58,7 +58,7 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||||
video_id)['data']
|
video_id)['data']
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
|
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||||
|
|
||||||
def _extract_rtmp_video(self, webpage):
|
def _extract_rtmp_video(self, webpage):
|
||||||
# The server URL is hardcoded
|
# The server URL is hardcoded
|
||||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
video_url = 'rtmpe://videof.infoq.com/cfx/st/'
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(
|
encoded_id = self._search_regex(
|
||||||
|
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
|
||||||
return [{
|
return [{
|
||||||
'format_id': 'http_video',
|
'format_id': 'http_video',
|
||||||
'url': http_video_url,
|
'url': http_video_url,
|
||||||
|
'http_headers': {'Referer': 'https://www.infoq.com/'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_http_audio(self, webpage, video_id):
|
def _extract_http_audio(self, webpage, video_id):
|
||||||
fields = self._hidden_inputs(webpage)
|
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||||
http_audio_url = fields.get('filename')
|
http_audio_url = fields.get('filename')
|
||||||
if not http_audio_url:
|
if not http_audio_url:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# base URL is found in the Location header in the response returned by
|
# base URL is found in the Location header in the response returned by
|
||||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||||
|
|
||||||
# audio file seem to be missing some times even if there is a download link
|
# audio file seem to be missing some times even if there is a download link
|
||||||
|
|
|
@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor):
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
'url': 'https://prima.iprima.cz/particka/92-epizoda',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p136534',
|
'id': 'p51388',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Gondíci s. r. o. (34)',
|
'title': 'Partička (92)',
|
||||||
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
|
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # m3u8 download
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://cnn.iprima.cz/videa/70-epizoda',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p681554',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 download
|
'skip_download': True, # m3u8 download
|
||||||
|
@ -68,9 +78,16 @@ class IPrimaIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(
|
||||||
|
webpage, default=None) or self._search_regex(
|
||||||
|
r'<h1>([^<]+)', webpage, 'title')
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
|
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
|
||||||
r'data-product="([^"]+)">'),
|
r'data-product="([^"]+)">',
|
||||||
|
r'id=["\']player-(p\d+)"',
|
||||||
|
r'playerId\s*:\s*["\']player-(p\d+)',
|
||||||
|
r'\bvideos\s*=\s*["\'](p\d+)'),
|
||||||
webpage, 'real id')
|
webpage, 'real id')
|
||||||
|
|
||||||
playerpage = self._download_webpage(
|
playerpage = self._download_webpage(
|
||||||
|
@ -125,8 +142,8 @@ class IPrimaIE(InfoExtractor):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage, default=None),
|
||||||
}
|
}
|
||||||
|
|
|
@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object):
|
||||||
elif function in other_functions:
|
elif function in other_functions:
|
||||||
other_functions[function]()
|
other_functions[function]()
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unknown funcion %s' % function)
|
raise ExtractorError('Unknown function %s' % function)
|
||||||
|
|
||||||
return sdk.target
|
return sdk.target
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unsmuggle_url
|
||||||
|
|
||||||
|
|
||||||
class JWPlatformIE(InfoExtractor):
|
class JWPlatformIE(InfoExtractor):
|
||||||
|
@ -32,10 +33,14 @@ class JWPlatformIE(InfoExtractor):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
|
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
self._initialize_geo_bypass({
|
||||||
|
'countries': smuggled_data.get('geo_countries'),
|
||||||
|
})
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
|
json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
|
||||||
return self._parse_jwplayer_data(json_data, video_id)
|
return self._parse_jwplayer_data(json_data, video_id)
|
||||||
|
|
|
@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
|
urls = KalturaIE._extract_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||||
mobj = (
|
finditer = (
|
||||||
re.search(
|
re.finditer(
|
||||||
r"""(?xs)
|
r"""(?xs)
|
||||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||||
\{.*?
|
\{.*?
|
||||||
|
@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor):
|
||||||
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||||
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||||
""", webpage)
|
""", webpage)
|
||||||
or re.search(
|
or re.finditer(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
(?P<q1>["'])
|
(?P<q1>["'])
|
||||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||||
|
@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor):
|
||||||
)
|
)
|
||||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||||
''', webpage)
|
''', webpage)
|
||||||
or re.search(
|
or re.finditer(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
|
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
|
||||||
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||||
|
@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor):
|
||||||
(?P=q1)
|
(?P=q1)
|
||||||
''', webpage)
|
''', webpage)
|
||||||
)
|
)
|
||||||
if mobj:
|
urls = []
|
||||||
|
for mobj in finditer:
|
||||||
embed_info = mobj.groupdict()
|
embed_info = mobj.groupdict()
|
||||||
for k, v in embed_info.items():
|
for k, v in embed_info.items():
|
||||||
if v:
|
if v:
|
||||||
|
@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor):
|
||||||
webpage)
|
webpage)
|
||||||
if service_mobj:
|
if service_mobj:
|
||||||
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
|
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
|
||||||
return url
|
urls.append(url)
|
||||||
|
return urls
|
||||||
|
|
||||||
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
|
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
|
||||||
params = actions[0]
|
params = actions[0]
|
||||||
|
|
|
@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
|
||||||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||||
description = xpath_text(doc, 'ABSTRACT')
|
description = xpath_text(doc, 'ABSTRACT')
|
||||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||||
|
|
||||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'timestamp': createtion_time,
|
'timestamp': creation_time,
|
||||||
}
|
}
|
||||||
|
|
214
youtube_dl/extractor/lbry.py
Normal file
214
youtube_dl/extractor/lbry.py
Normal file
|
@ -0,0 +1,214 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
OnDemandPagedList,
|
||||||
|
try_get,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LBRYBaseIE(InfoExtractor):
|
||||||
|
_BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
|
||||||
|
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||||
|
_OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
|
||||||
|
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||||
|
|
||||||
|
def _call_api_proxy(self, method, display_id, params, resource):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.lbry.tv/api/v1/proxy',
|
||||||
|
display_id, 'Downloading %s JSON metadata' % resource,
|
||||||
|
headers={'Content-Type': 'application/json-rpc'},
|
||||||
|
data=json.dumps({
|
||||||
|
'method': method,
|
||||||
|
'params': params,
|
||||||
|
}).encode())['result']
|
||||||
|
|
||||||
|
def _resolve_url(self, url, display_id, resource):
|
||||||
|
return self._call_api_proxy(
|
||||||
|
'resolve', display_id, {'urls': url}, resource)[url]
|
||||||
|
|
||||||
|
def _permanent_url(self, url, claim_name, claim_id):
|
||||||
|
return urljoin(url, '/%s:%s' % (claim_name, claim_id))
|
||||||
|
|
||||||
|
def _parse_stream(self, stream, url):
|
||||||
|
stream_value = stream.get('value') or {}
|
||||||
|
stream_type = stream_value.get('stream_type')
|
||||||
|
source = stream_value.get('source') or {}
|
||||||
|
media = stream_value.get(stream_type) or {}
|
||||||
|
signing_channel = stream.get('signing_channel') or {}
|
||||||
|
channel_name = signing_channel.get('name')
|
||||||
|
channel_claim_id = signing_channel.get('claim_id')
|
||||||
|
channel_url = None
|
||||||
|
if channel_name and channel_claim_id:
|
||||||
|
channel_url = self._permanent_url(url, channel_name, channel_claim_id)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||||
|
'description': stream_value.get('description'),
|
||||||
|
'license': stream_value.get('license'),
|
||||||
|
'timestamp': int_or_none(stream.get('timestamp')),
|
||||||
|
'tags': stream_value.get('tags'),
|
||||||
|
'duration': int_or_none(media.get('duration')),
|
||||||
|
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||||
|
'channel_id': channel_claim_id,
|
||||||
|
'channel_url': channel_url,
|
||||||
|
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||||
|
'filesize': int_or_none(source.get('size')),
|
||||||
|
}
|
||||||
|
if stream_type == 'audio':
|
||||||
|
info['vcodec'] = 'none'
|
||||||
|
else:
|
||||||
|
info.update({
|
||||||
|
'width': int_or_none(media.get('width')),
|
||||||
|
'height': int_or_none(media.get('height')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class LBRYIE(LBRYBaseIE):
|
||||||
|
IE_NAME = 'lbry'
|
||||||
|
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
|
||||||
|
_TESTS = [{
|
||||||
|
# Video
|
||||||
|
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||||
|
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'First day in LBRY? Start HERE!',
|
||||||
|
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||||
|
'timestamp': 1595694354,
|
||||||
|
'upload_date': '20200725',
|
||||||
|
'width': 1280,
|
||||||
|
'height': 720,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Audio
|
||||||
|
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
|
||||||
|
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
|
||||||
|
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||||
|
'timestamp': 1591312601,
|
||||||
|
'upload_date': '20200604',
|
||||||
|
'tags': list,
|
||||||
|
'duration': 2570,
|
||||||
|
'channel': 'The LBRY Foundation',
|
||||||
|
'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||||
|
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://lbry.tv/Episode-1:e7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://lbry.tv/@LBRYFoundation/Episode-1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
if display_id.startswith('$/'):
|
||||||
|
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||||
|
else:
|
||||||
|
display_id = display_id.replace(':', '#')
|
||||||
|
uri = 'lbry://' + display_id
|
||||||
|
result = self._resolve_url(uri, display_id, 'stream')
|
||||||
|
result_value = result['value']
|
||||||
|
if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES:
|
||||||
|
raise ExtractorError('Unsupported URL', expected=True)
|
||||||
|
claim_id = result['claim_id']
|
||||||
|
title = result_value['title']
|
||||||
|
streaming_url = self._call_api_proxy(
|
||||||
|
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||||
|
info = self._parse_stream(result, url)
|
||||||
|
info.update({
|
||||||
|
'id': claim_id,
|
||||||
|
'title': title,
|
||||||
|
'url': streaming_url,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class LBRYChannelIE(LBRYBaseIE):
|
||||||
|
IE_NAME = 'lbry:channel'
|
||||||
|
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||||
|
'title': 'The LBRY Foundation',
|
||||||
|
'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
|
||||||
|
},
|
||||||
|
'playlist_count': 29,
|
||||||
|
}, {
|
||||||
|
'url': 'https://lbry.tv/@LBRYFoundation',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 50
|
||||||
|
|
||||||
|
def _fetch_page(self, claim_id, url, page):
|
||||||
|
page += 1
|
||||||
|
result = self._call_api_proxy(
|
||||||
|
'claim_search', claim_id, {
|
||||||
|
'channel_ids': [claim_id],
|
||||||
|
'claim_type': 'stream',
|
||||||
|
'no_totals': True,
|
||||||
|
'page': page,
|
||||||
|
'page_size': self._PAGE_SIZE,
|
||||||
|
'stream_types': self._SUPPORTED_STREAM_TYPES,
|
||||||
|
}, 'page %d' % page)
|
||||||
|
for item in (result.get('items') or []):
|
||||||
|
stream_claim_name = item.get('name')
|
||||||
|
stream_claim_id = item.get('claim_id')
|
||||||
|
if not (stream_claim_name and stream_claim_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
info = self._parse_stream(item, url)
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'id': stream_claim_id,
|
||||||
|
'title': try_get(item, lambda x: x['value']['title']),
|
||||||
|
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
|
||||||
|
})
|
||||||
|
yield info
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url).replace(':', '#')
|
||||||
|
result = self._resolve_url(
|
||||||
|
'lbry://' + display_id, display_id, 'channel')
|
||||||
|
claim_id = result['claim_id']
|
||||||
|
entries = OnDemandPagedList(
|
||||||
|
functools.partial(self._fetch_page, claim_id, url),
|
||||||
|
self._PAGE_SIZE)
|
||||||
|
result_value = result.get('value') or {}
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, claim_id, result_value.get('title'),
|
||||||
|
result_value.get('description'))
|
|
@ -5,28 +5,26 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
clean_html,
|
||||||
int_or_none,
|
merge_dicts,
|
||||||
parse_duration,
|
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LRTIE(InfoExtractor):
|
class LRTIE(InfoExtractor):
|
||||||
IE_NAME = 'lrt.lt'
|
IE_NAME = 'lrt.lt'
|
||||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
|
||||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
'md5': '85cb2bb530f31d91a9c65b479516ade4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '54391',
|
'id': '2000127261',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Septynios Kauno dienos',
|
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
|
||||||
'description': 'md5:24d84534c7dc76581e59f5689462411a',
|
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
|
||||||
'duration': 1783,
|
'duration': 3035,
|
||||||
'view_count': int,
|
'timestamp': 1604079000,
|
||||||
'like_count': int,
|
'upload_date': '20201030',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# direct mp3 download
|
# direct mp3 download
|
||||||
|
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_js_var(self, webpage, var_name, default):
|
||||||
|
return self._search_regex(
|
||||||
|
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
|
||||||
|
webpage, var_name.replace('_', ' '), default, group=2)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||||
|
media = self._download_json(self._extract_js_var(
|
||||||
|
webpage, 'media_info_url',
|
||||||
|
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
|
||||||
|
video_id, query={'url': media_url})
|
||||||
|
jw_data = self._parse_jwplayer_data(
|
||||||
|
media['playlist_item'], video_id, base_url=url)
|
||||||
|
|
||||||
formats = []
|
json_ld_data = self._search_json_ld(webpage, video_id)
|
||||||
for _, file_url in re.findall(
|
|
||||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
tags = []
|
||||||
ext = determine_ext(file_url)
|
for tag in (media.get('tags') or []):
|
||||||
if ext not in ('m3u8', 'mp3'):
|
tag_name = tag.get('name')
|
||||||
|
if not tag_name:
|
||||||
continue
|
continue
|
||||||
# mp3 served as m3u8 produces stuttered media file
|
tags.append(tag_name)
|
||||||
if ext == 'm3u8' and '.mp3' in file_url:
|
|
||||||
continue
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
fatal=False))
|
|
||||||
elif ext == 'mp3':
|
|
||||||
formats.append({
|
|
||||||
'url': file_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
clean_info = {
|
||||||
description = self._og_search_description(webpage)
|
'description': clean_html(media.get('content')),
|
||||||
duration = parse_duration(self._search_regex(
|
'tags': tags,
|
||||||
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
|
|
||||||
webpage, 'duration', default=None, group='duration'))
|
|
||||||
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
|
||||||
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
|
|
||||||
webpage, 'view count', fatal=False, group='count'))
|
|
||||||
like_count = int_or_none(self._search_regex(
|
|
||||||
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
|
|
||||||
webpage, 'like count', fatal=False, group='count'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'like_count': like_count,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||||
|
|
|
@ -128,6 +128,12 @@ class MailRuIE(InfoExtractor):
|
||||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
|
||||||
|
video_key = self._get_cookies('https://my.mail.ru').get('video_key')
|
||||||
|
if video_key:
|
||||||
|
headers['Cookie'] = 'video_key=%s' % video_key.value
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in video_data['videos']:
|
for f in video_data['videos']:
|
||||||
video_url = f.get('url')
|
video_url = f.get('url')
|
||||||
|
@ -140,6 +146,7 @@ class MailRuIE(InfoExtractor):
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'height': height,
|
'height': height,
|
||||||
|
'http_headers': headers,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,20 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import merge_dicts
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
dict_get,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MallTVIE(InfoExtractor):
|
class MallTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
||||||
|
@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
|
||||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
|
||||||
'duration': 216,
|
'duration': 216,
|
||||||
'timestamp': 1538870400,
|
'timestamp': 1538870400,
|
||||||
'upload_date': '20181007',
|
'upload_date': '20181007',
|
||||||
|
@ -26,6 +32,9 @@ class MallTVIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -34,20 +43,46 @@ class MallTVIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, display_id, headers=self.geo_verification_headers())
|
url, display_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
video = self._parse_json(self._search_regex(
|
||||||
|
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||||
|
webpage, 'video object'), display_id)
|
||||||
|
video_source = video['VideoSource']
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
SOURCE_RE, webpage, 'video id', group='id')
|
r'/([\da-z]+)/index\b', video_source, 'video id')
|
||||||
|
|
||||||
media = self._parse_html5_media_entries(
|
formats = self._extract_m3u8_formats(
|
||||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
|
||||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for s in (video.get('Subtitles') or {}):
|
||||||
|
s_url = s.get('Url')
|
||||||
|
if not s_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(s.get('Language') or 'cz', []).append({
|
||||||
|
'url': s_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
entity_counts = video.get('EntityCounts') or {}
|
||||||
|
|
||||||
|
def get_count(k):
|
||||||
|
v = entity_counts.get(k + 's') or {}
|
||||||
|
return int_or_none(dict_get(v, ('Count', 'StrCount')))
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
return merge_dicts(media, info, {
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
'title': video.get('Title'),
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': clean_html(video.get('Description')),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': video.get('ThumbnailUrl'),
|
||||||
})
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
|
||||||
|
'view_count': get_count('View'),
|
||||||
|
'like_count': get_count('Like'),
|
||||||
|
'dislike_count': get_count('Dislike'),
|
||||||
|
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
|
||||||
|
'comment_count': get_count('Comment'),
|
||||||
|
}, info)
|
||||||
|
|
131
youtube_dl/extractor/medaltv.py
Normal file
131
youtube_dl/extractor/medaltv.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MedalTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||||
|
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '34934644',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Quad Cold',
|
||||||
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
|
'uploader': 'MowgliSB',
|
||||||
|
'timestamp': 1603165266,
|
||||||
|
'upload_date': '20201020',
|
||||||
|
'uploader_id': 10619174,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://medal.tv/clips/36787208',
|
||||||
|
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36787208',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'u tk me i tk u bigger',
|
||||||
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
|
'uploader': 'Mimicc',
|
||||||
|
'timestamp': 1605580939,
|
||||||
|
'upload_date': '20201117',
|
||||||
|
'uploader_id': 5156321,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
hydration_data = self._parse_json(self._search_regex(
|
||||||
|
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||||
|
webpage, 'hydration data', default='{}'), video_id)
|
||||||
|
|
||||||
|
clip = try_get(
|
||||||
|
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||||
|
if not clip:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Could not find video information.', video_id=video_id)
|
||||||
|
|
||||||
|
title = clip['contentTitle']
|
||||||
|
|
||||||
|
source_width = int_or_none(clip.get('sourceWidth'))
|
||||||
|
source_height = int_or_none(clip.get('sourceHeight'))
|
||||||
|
|
||||||
|
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||||
|
|
||||||
|
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||||
|
item_id = item_id or '%dp' % height
|
||||||
|
if item_id not in item_url:
|
||||||
|
return
|
||||||
|
width = int(round(aspect_ratio * height))
|
||||||
|
container.append({
|
||||||
|
'url': item_url,
|
||||||
|
id_key: item_id,
|
||||||
|
'width': width,
|
||||||
|
'height': height
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnails = []
|
||||||
|
for k, v in clip.items():
|
||||||
|
if not (v and isinstance(v, compat_str)):
|
||||||
|
continue
|
||||||
|
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
prefix = mobj.group(1)
|
||||||
|
height = int_or_none(mobj.group(2))
|
||||||
|
if prefix == 'contentUrl':
|
||||||
|
add_item(
|
||||||
|
formats, v, height or source_height,
|
||||||
|
item_id=None if height else 'source')
|
||||||
|
elif prefix == 'thumbnail':
|
||||||
|
add_item(thumbnails, v, height, 'id')
|
||||||
|
|
||||||
|
error = clip.get('error')
|
||||||
|
if not formats and error:
|
||||||
|
if error == 404:
|
||||||
|
raise ExtractorError(
|
||||||
|
'That clip does not exist.',
|
||||||
|
expected=True, video_id=video_id)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'An unknown error occurred ({0}).'.format(error),
|
||||||
|
video_id=video_id)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# Necessary because the id of the author is not known in advance.
|
||||||
|
# Won't raise an issue if no profile can be found as this is optional.
|
||||||
|
author = try_get(
|
||||||
|
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||||
|
author_id = str_or_none(author.get('id'))
|
||||||
|
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': clip.get('contentDescription'),
|
||||||
|
'uploader': author.get('displayName'),
|
||||||
|
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||||
|
'uploader_id': author_id,
|
||||||
|
'uploader_url': author_url,
|
||||||
|
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||||
|
'view_count': int_or_none(clip.get('views')),
|
||||||
|
'like_count': int_or_none(clip.get('likes')),
|
||||||
|
'comment_count': int_or_none(clip.get('comments')),
|
||||||
|
}
|
|
@ -23,7 +23,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||||
https?://
|
https?://
|
||||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||||
(?:
|
(?:
|
||||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
|
||||||
player/index\.html\?.*?\bprogramGuid=
|
player/index\.html\?.*?\bprogramGuid=
|
||||||
)
|
)
|
||||||
)(?P<id>[0-9A-Z]{16,})
|
)(?P<id>[0-9A-Z]{16,})
|
||||||
|
@ -88,6 +88,9 @@ class MediasetIE(ThePlatformBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -17,9 +17,8 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class MGTVIE(InfoExtractor):
|
class MGTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
IE_DESC = '芒果TV'
|
IE_DESC = '芒果TV'
|
||||||
_GEO_COUNTRIES = ['CN']
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||||
|
@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://w.mgtv.com/b/301817/3826653.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
|
||||||
try:
|
try:
|
||||||
api_data = self._download_json(
|
api_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
||||||
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor):
|
||||||
stream_data = self._download_json(
|
stream_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
||||||
'pm2': api_data['atc']['pm2'],
|
'pm2': api_data['atc']['pm2'],
|
||||||
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
stream_domain = stream_data['stream_domain'][0]
|
stream_domain = stream_data['stream_domain'][0]
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
@ -54,3 +57,23 @@ class MofosexIE(KeezMoviesIE):
|
||||||
})
|
})
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class MofosexEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
|
||||||
|
ie=MofosexIE.ie_key(), video_id=video_id)
|
||||||
|
|
|
@ -26,7 +26,7 @@ class MotherlessIE(InfoExtractor):
|
||||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||||
'upload_date': '20100913',
|
'upload_date': '20100913',
|
||||||
'uploader_id': 'famouslyfuckedup',
|
'uploader_id': 'famouslyfuckedup',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
@ -40,7 +40,7 @@ class MotherlessIE(InfoExtractor):
|
||||||
'game', 'hairy'],
|
'game', 'hairy'],
|
||||||
'upload_date': '20140622',
|
'upload_date': '20140622',
|
||||||
'uploader_id': 'Sulivana7x',
|
'uploader_id': 'Sulivana7x',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'skip': '404',
|
'skip': '404',
|
||||||
|
@ -54,7 +54,7 @@ class MotherlessIE(InfoExtractor):
|
||||||
'categories': ['superheroine heroine superher'],
|
'categories': ['superheroine heroine superher'],
|
||||||
'upload_date': '20140827',
|
'upload_date': '20140827',
|
||||||
'uploader_id': 'shade0230',
|
'uploader_id': 'shade0230',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
@ -76,7 +76,8 @@ class MotherlessIE(InfoExtractor):
|
||||||
raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
|
raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
(r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
|
||||||
|
r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
|
||||||
video_url = (self._html_search_regex(
|
video_url = (self._html_search_regex(
|
||||||
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||||
|
@ -84,14 +85,15 @@ class MotherlessIE(InfoExtractor):
|
||||||
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
view_count = str_to_int(self._html_search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
r'<strong>Views</strong>\s+([^<]+)<',
|
(r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
like_count = str_to_int(self._html_search_regex(
|
like_count = str_to_int(self._html_search_regex(
|
||||||
r'<strong>Favorited</strong>\s+([^<]+)<',
|
(r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
|
|
||||||
upload_date = self._html_search_regex(
|
upload_date = self._html_search_regex(
|
||||||
r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
|
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
|
||||||
|
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
|
||||||
if 'Ago' in upload_date:
|
if 'Ago' in upload_date:
|
||||||
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
||||||
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
||||||
|
|
|
@ -349,6 +349,18 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_child_with_type(parent, t):
|
||||||
|
children = parent['children']
|
||||||
|
return next(c for c in children if c.get('type') == t)
|
||||||
|
|
||||||
|
def _extract_mgid(self, webpage):
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
|
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||||
|
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||||
|
return video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
|
|
||||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'mtvjapan'
|
IE_NAME = 'mtvjapan'
|
||||||
|
|
|
@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -394,8 +393,8 @@ class NBCNewsIE(ThePlatformIE):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'window\.__data\s*=\s*({.+});', webpage,
|
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||||
'bootstrap json'), video_id, js_to_json)
|
webpage, 'bootstrap json'), video_id)['props']['initialState']
|
||||||
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
||||||
if not video_data:
|
if not video_data:
|
||||||
video_data = data['article']['content'][0]['primaryMedia']['video']
|
video_data = data['article']['content'][0]['primaryMedia']['video']
|
||||||
|
|
|
@ -81,6 +81,29 @@ class NDRIE(NDRBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# with subtitles
|
||||||
|
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'extra18674',
|
||||||
|
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||||
|
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'upload_date': '20201113',
|
||||||
|
'duration': 1749,
|
||||||
|
'subtitles': {
|
||||||
|
'de': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
'url': r're:^https://www\.ndr\.de.+',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -239,6 +262,20 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||||
'preference': quality_key(thumbnail.get('quality')),
|
'preference': quality_key(thumbnail.get('quality')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = config.get('tracks')
|
||||||
|
if tracks and isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
track_url = urljoin(url, track.get('src'))
|
||||||
|
if not track_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(track.get('srclang') or 'de', []).append({
|
||||||
|
'url': track_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -248,6 +285,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ class NhkVodIE(InfoExtractor):
|
||||||
# Content available only for a limited period of time. Visit
|
# Content available only for a limited period of time. Visit
|
||||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# clip
|
# video clip
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -21,6 +21,19 @@ class NhkVodIE(InfoExtractor):
|
||||||
'timestamp': 1565965194,
|
'timestamp': 1565965194,
|
||||||
'upload_date': '20190816',
|
'upload_date': '20190816',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# audio clip
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'r_inventions-20201104-1-en',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': "Japan's Top Inventions - Miniature Video Cameras",
|
||||||
|
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -6,6 +6,7 @@ import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
qualities,
|
qualities,
|
||||||
|
@ -33,42 +34,76 @@ class NovaEmbedIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
bitrates = self._parse_json(
|
duration = None
|
||||||
self._search_regex(
|
|
||||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
|
||||||
quality_key = qualities(QUALITIES)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_list in bitrates.items():
|
|
||||||
if not isinstance(format_list, list):
|
player = self._parse_json(
|
||||||
format_list = [format_list]
|
self._search_regex(
|
||||||
for format_url in format_list:
|
r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
|
||||||
format_url = url_or_none(format_url)
|
webpage, 'player', default='{}'), video_id, fatal=False)
|
||||||
if not format_url:
|
if player:
|
||||||
continue
|
for format_id, format_list in player['tracks'].items():
|
||||||
if format_id == 'hls':
|
if not isinstance(format_list, list):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
format_list = [format_list]
|
||||||
format_url, video_id, ext='mp4',
|
for format_dict in format_list:
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
if not isinstance(format_dict, dict):
|
||||||
fatal=False))
|
continue
|
||||||
continue
|
format_url = url_or_none(format_dict.get('src'))
|
||||||
f = {
|
format_type = format_dict.get('type')
|
||||||
'url': format_url,
|
ext = determine_ext(format_url)
|
||||||
}
|
if (format_type == 'application/x-mpegURL'
|
||||||
f_id = format_id
|
or format_id == 'HLS' or ext == 'm3u8'):
|
||||||
for quality in QUALITIES:
|
formats.extend(self._extract_m3u8_formats(
|
||||||
if '%s.mp4' % quality in format_url:
|
format_url, video_id, 'mp4',
|
||||||
f_id += '-%s' % quality
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
f.update({
|
fatal=False))
|
||||||
'quality': quality_key(quality),
|
elif (format_type == 'application/dash+xml'
|
||||||
'format_note': quality.upper(),
|
or format_id == 'DASH' or ext == 'mpd'):
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
})
|
})
|
||||||
break
|
duration = int_or_none(player.get('duration'))
|
||||||
f['format_id'] = f_id
|
else:
|
||||||
formats.append(f)
|
# Old path, not actual as of 08.04.2020
|
||||||
|
bitrates = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||||
|
quality_key = qualities(QUALITIES)
|
||||||
|
|
||||||
|
for format_id, format_list in bitrates.items():
|
||||||
|
if not isinstance(format_list, list):
|
||||||
|
format_list = [format_list]
|
||||||
|
for format_url in format_list:
|
||||||
|
format_url = url_or_none(format_url)
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
if format_id == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'url': format_url,
|
||||||
|
}
|
||||||
|
f_id = format_id
|
||||||
|
for quality in QUALITIES:
|
||||||
|
if '%s.mp4' % quality in format_url:
|
||||||
|
f_id += '-%s' % quality
|
||||||
|
f.update({
|
||||||
|
'quality': quality_key(quality),
|
||||||
|
'format_note': quality.upper(),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
f['format_id'] = f_id
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._og_search_title(
|
title = self._og_search_title(
|
||||||
|
@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
|
||||||
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
'thumbnail', fatal=False, group='value')
|
'thumbnail', fatal=False, group='value')
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(self._search_regex(
|
||||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
|
||||||
|
default=duration))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
# mutlimedia, not media title
|
# multimedia, not media title
|
||||||
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '533198237',
|
'id': '533198237',
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -9,25 +11,289 @@ from ..compat import (
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
JSON_LD_RE,
|
|
||||||
js_to_json,
|
|
||||||
NO_DEFAULT,
|
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
|
urljoin,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NRKBaseIE(InfoExtractor):
|
class NRKBaseIE(InfoExtractor):
|
||||||
_GEO_COUNTRIES = ['NO']
|
_GEO_COUNTRIES = ['NO']
|
||||||
|
_CDN_REPL_REGEX = r'''(?x)://
|
||||||
|
(?:
|
||||||
|
nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0|
|
||||||
|
nrk-od-no\.telenorcdn\.net|
|
||||||
|
minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
|
||||||
|
)/'''
|
||||||
|
|
||||||
_api_host = None
|
def _extract_nrk_formats(self, asset_url, video_id):
|
||||||
|
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||||
|
return self._extract_akamai_formats(
|
||||||
|
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
|
||||||
|
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||||
|
if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
|
||||||
|
video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _raise_error(self, data):
|
||||||
|
MESSAGES = {
|
||||||
|
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
||||||
|
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
||||||
|
'NoProgramRights': 'Ikke tilgjengelig',
|
||||||
|
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||||
|
}
|
||||||
|
message_type = data.get('messageType', '')
|
||||||
|
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||||
|
if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||||
|
countries=self._GEO_COUNTRIES)
|
||||||
|
message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
||||||
|
return self._download_json(
|
||||||
|
urljoin('http://psapi.nrk.no/', path),
|
||||||
|
video_id, note or 'Downloading %s JSON' % item,
|
||||||
|
fatal=fatal, query=query)
|
||||||
|
|
||||||
|
|
||||||
|
class NRKIE(NRKBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
nrk:|
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
|
||||||
|
v8[-.]psapi\.nrk\.no/mediaelement/
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(?P<id>[^?\#&]+)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# video
|
||||||
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
|
'md5': 'f46be075326e23ad0e524edfcb06aeb6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '150533',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dompap og andre fugler i Piip-Show',
|
||||||
|
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
|
'duration': 262,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# audio
|
||||||
|
'url': 'http://www.nrk.no/video/PS*154915',
|
||||||
|
# MD5 is unstable
|
||||||
|
'info_dict': {
|
||||||
|
'id': '154915',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Slik høres internett ut når du er blind',
|
||||||
|
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||||
|
'duration': 20,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_from_playback(self, video_id):
|
||||||
|
path_templ = 'playback/%s/' + video_id
|
||||||
|
|
||||||
|
def call_playback_api(item, query=None):
|
||||||
|
return self._call_api(path_templ % item, video_id, item, query=query)
|
||||||
|
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
||||||
|
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
|
||||||
|
|
||||||
|
if manifest.get('playability') == 'nonPlayable':
|
||||||
|
self._raise_error(manifest['nonPlayable'])
|
||||||
|
|
||||||
|
playable = manifest['playable']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for asset in playable['assets']:
|
||||||
|
if not isinstance(asset, dict):
|
||||||
|
continue
|
||||||
|
if asset.get('encrypted'):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(asset.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
data = call_playback_api('metadata')
|
||||||
|
|
||||||
|
preplay = data['preplay']
|
||||||
|
titles = preplay['titles']
|
||||||
|
title = titles['title']
|
||||||
|
alt_title = titles.get('subtitle')
|
||||||
|
|
||||||
|
description = preplay.get('description')
|
||||||
|
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for image in try_get(
|
||||||
|
preplay, lambda x: x['poster']['images'], list) or []:
|
||||||
|
if not isinstance(image, dict):
|
||||||
|
continue
|
||||||
|
image_url = url_or_none(image.get('url'))
|
||||||
|
if not image_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('pixelWidth')),
|
||||||
|
'height': int_or_none(image.get('pixelHeight')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_from_playback(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVIE(NRKBaseIE):
|
||||||
|
IE_DESC = 'NRK TV and NRK Radio'
|
||||||
|
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||||
|
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
|
||||||
|
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||||
|
'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MDDP12000117AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Alarm Trolltunga',
|
||||||
|
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||||
|
'duration': 2223.44,
|
||||||
|
'age_limit': 6,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
|
'md5': '8d40dab61cea8ab0114e090b029a0565',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MUHH48000314AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '20 spørsmål 23.05.2014',
|
||||||
|
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||||
|
'duration': 1741,
|
||||||
|
'series': '20 spørsmål',
|
||||||
|
'episode': '23.05.2014',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MDFP15000514CA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||||
|
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||||
|
'duration': 4605.08,
|
||||||
|
'series': 'Kunnskapskanalen',
|
||||||
|
'episode': '24.05.2014',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# single playlist video
|
||||||
|
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515AH',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||||
|
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
|
'skip': 'particular part is not supported currently',
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515AH',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||||
|
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'KMTE50001317AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Anno 13:30',
|
||||||
|
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||||
|
'duration': 2340,
|
||||||
|
'series': 'Anno',
|
||||||
|
'episode': '13:30',
|
||||||
|
'season_number': 3,
|
||||||
|
'episode_number': 13,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MUHH46000317AA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nytt på Nytt 27.01.2017',
|
||||||
|
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||||
|
'duration': 1796,
|
||||||
|
'series': 'Nytt på nytt',
|
||||||
|
'episode': '27.01.2017',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'ProgramRightsHasExpired',
|
||||||
|
}, {
|
||||||
|
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_api_host = None
|
||||||
|
|
||||||
|
def _extract_from_mediaelement(self, video_id):
|
||||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||||
|
|
||||||
for api_host in api_hosts:
|
for api_host in api_hosts:
|
||||||
|
@ -43,6 +309,7 @@ class NRKBaseIE(InfoExtractor):
|
||||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||||
video_id = data.get('id') or video_id
|
video_id = data.get('id') or video_id
|
||||||
|
|
||||||
|
urls = []
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
conviva = data.get('convivaStatistics') or {}
|
conviva = data.get('convivaStatistics') or {}
|
||||||
|
@ -59,19 +326,14 @@ class NRKBaseIE(InfoExtractor):
|
||||||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
||||||
for num, asset in enumerate(media_assets, 1):
|
for num, asset in enumerate(media_assets, 1):
|
||||||
asset_url = asset.get('url')
|
asset_url = asset.get('url')
|
||||||
if not asset_url:
|
if not asset_url or asset_url in urls:
|
||||||
continue
|
continue
|
||||||
formats = self._extract_akamai_formats(asset_url, video_id)
|
urls.append(asset_url)
|
||||||
|
formats = self._extract_nrk_formats(asset_url, video_id)
|
||||||
if not formats:
|
if not formats:
|
||||||
continue
|
continue
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
# Some f4m streams may not work with hdcore in fragments' URLs
|
|
||||||
for f in formats:
|
|
||||||
extra_param = f.get('extra_param_to_segment_url')
|
|
||||||
if extra_param and 'hdcore' in extra_param:
|
|
||||||
del f['extra_param_to_segment_url']
|
|
||||||
|
|
||||||
entry_id, entry_title = video_id_and_title(num)
|
entry_id, entry_title = video_id_and_title(num)
|
||||||
duration = parse_duration(asset.get('duration'))
|
duration = parse_duration(asset.get('duration'))
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -87,38 +349,26 @@ class NRKBaseIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'is_live': live,
|
||||||
})
|
})
|
||||||
|
|
||||||
if not entries:
|
if not entries:
|
||||||
media_url = data.get('mediaUrl')
|
media_url = data.get('mediaUrl')
|
||||||
if media_url:
|
if media_url and media_url not in urls:
|
||||||
formats = self._extract_akamai_formats(media_url, video_id)
|
formats = self._extract_nrk_formats(media_url, video_id)
|
||||||
self._sort_formats(formats)
|
if formats:
|
||||||
duration = parse_duration(data.get('duration'))
|
self._sort_formats(formats)
|
||||||
entries = [{
|
duration = parse_duration(data.get('duration'))
|
||||||
'id': video_id,
|
entries = [{
|
||||||
'title': make_title(title),
|
'id': video_id,
|
||||||
'duration': duration,
|
'title': make_title(title),
|
||||||
'formats': formats,
|
'duration': duration,
|
||||||
}]
|
'formats': formats,
|
||||||
|
'is_live': live,
|
||||||
|
}]
|
||||||
|
|
||||||
if not entries:
|
if not entries:
|
||||||
MESSAGES = {
|
self._raise_error(data)
|
||||||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
|
||||||
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
|
||||||
'NoProgramRights': 'Ikke tilgjengelig',
|
|
||||||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
|
||||||
}
|
|
||||||
message_type = data.get('messageType', '')
|
|
||||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
|
||||||
if 'IsGeoBlocked' in message_type:
|
|
||||||
self.raise_geo_restricted(
|
|
||||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
|
||||||
countries=self._GEO_COUNTRIES)
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
|
|
||||||
message_type, message_type)),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||||
|
@ -196,190 +446,9 @@ class NRKBaseIE(InfoExtractor):
|
||||||
|
|
||||||
return self.playlist_result(entries, video_id, title, description)
|
return self.playlist_result(entries, video_id, title, description)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
class NRKIE(NRKBaseIE):
|
video_id = self._match_id(url)
|
||||||
_VALID_URL = r'''(?x)
|
return self._extract_from_mediaelement(video_id)
|
||||||
(?:
|
|
||||||
nrk:|
|
|
||||||
https?://
|
|
||||||
(?:
|
|
||||||
(?:www\.)?nrk\.no/video/PS\*|
|
|
||||||
v8[-.]psapi\.nrk\.no/mediaelement/
|
|
||||||
)
|
|
||||||
)
|
|
||||||
(?P<id>[^?#&]+)
|
|
||||||
'''
|
|
||||||
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
|
|
||||||
_TESTS = [{
|
|
||||||
# video
|
|
||||||
'url': 'http://www.nrk.no/video/PS*150533',
|
|
||||||
'md5': '706f34cdf1322577589e369e522b50ef',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '150533',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Dompap og andre fugler i Piip-Show',
|
|
||||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
|
||||||
'duration': 262,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# audio
|
|
||||||
'url': 'http://www.nrk.no/video/PS*154915',
|
|
||||||
# MD5 is unstable
|
|
||||||
'info_dict': {
|
|
||||||
'id': '154915',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Slik høres internett ut når du er blind',
|
|
||||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
|
||||||
'duration': 20,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class NRKTVIE(NRKBaseIE):
|
|
||||||
IE_DESC = 'NRK TV and NRK Radio'
|
|
||||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https?://
|
|
||||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
|
||||||
(?:serie(?:/[^/]+){1,2}|program)/
|
|
||||||
(?![Ee]pisodes)%s
|
|
||||||
(?:/\d{2}-\d{2}-\d{4})?
|
|
||||||
(?:\#del=(?P<part_id>\d+))?
|
|
||||||
''' % _EPISODE_RE
|
|
||||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
|
||||||
'md5': '8270824df46ec629b66aeaa5796b36fb',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MDDP12000117AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Alarm Trolltunga',
|
|
||||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
|
||||||
'duration': 2223,
|
|
||||||
'age_limit': 6,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
|
||||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MUHH48000314AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '20 spørsmål 23.05.2014',
|
|
||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
|
||||||
'duration': 1741,
|
|
||||||
'series': '20 spørsmål',
|
|
||||||
'episode': '23.05.2014',
|
|
||||||
},
|
|
||||||
'skip': 'NoProgramRights',
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MDFP15000514CA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
|
||||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
|
||||||
'duration': 4605,
|
|
||||||
'series': 'Kunnskapskanalen',
|
|
||||||
'episode': '24.05.2014',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# single playlist video
|
|
||||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515-part2',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
|
||||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Video is geo restricted'],
|
|
||||||
'skip': 'particular part is not supported currently',
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
|
||||||
'playlist': [{
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515AH',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
|
||||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
|
||||||
'duration': 772,
|
|
||||||
'series': 'Tour de Ski',
|
|
||||||
'episode': '06.01.2015',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515BH',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
|
||||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
|
||||||
'duration': 6175,
|
|
||||||
'series': 'Tour de Ski',
|
|
||||||
'episode': '06.01.2015',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MSPO40010515',
|
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
|
||||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Video is geo restricted'],
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'KMTE50001317AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Anno 13:30',
|
|
||||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
|
||||||
'duration': 2340,
|
|
||||||
'series': 'Anno',
|
|
||||||
'episode': '13:30',
|
|
||||||
'season_number': 3,
|
|
||||||
'episode_number': 13,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'MUHH46000317AA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Nytt på Nytt 27.01.2017',
|
|
||||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
|
||||||
'duration': 1796,
|
|
||||||
'series': 'Nytt på nytt',
|
|
||||||
'episode': '27.01.2017',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class NRKTVEpisodeIE(InfoExtractor):
|
class NRKTVEpisodeIE(InfoExtractor):
|
||||||
|
@ -425,66 +494,114 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
nrk_id = self._parse_json(
|
info = self._search_json_ld(webpage, display_id, default={})
|
||||||
self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
|
nrk_id = info.get('@id') or self._html_search_meta(
|
||||||
display_id)['@id']
|
'nrk:program-id', webpage, default=None) or self._search_regex(
|
||||||
|
r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage,
|
||||||
|
'nrk id')
|
||||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||||
return self.url_result(
|
|
||||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': nrk_id,
|
||||||
|
'url': 'nrk:%s' % nrk_id,
|
||||||
|
'ie_key': NRKIE.ie_key(),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class NRKTVSerieBaseIE(InfoExtractor):
|
class NRKTVSerieBaseIE(NRKBaseIE):
|
||||||
def _extract_series(self, webpage, display_id, fatal=True):
|
|
||||||
config = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
|
|
||||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
|
|
||||||
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
|
|
||||||
display_id, fatal=False, transform_source=js_to_json)
|
|
||||||
if not config:
|
|
||||||
return
|
|
||||||
return try_get(
|
|
||||||
config,
|
|
||||||
(lambda x: x['initialState']['series'], lambda x: x['series']),
|
|
||||||
dict)
|
|
||||||
|
|
||||||
def _extract_seasons(self, seasons):
|
|
||||||
if not isinstance(seasons, list):
|
|
||||||
return []
|
|
||||||
entries = []
|
|
||||||
for season in seasons:
|
|
||||||
entries.extend(self._extract_episodes(season))
|
|
||||||
return entries
|
|
||||||
|
|
||||||
def _extract_episodes(self, season):
|
|
||||||
if not isinstance(season, dict):
|
|
||||||
return []
|
|
||||||
return self._extract_entries(season.get('episodes'))
|
|
||||||
|
|
||||||
def _extract_entries(self, entry_list):
|
def _extract_entries(self, entry_list):
|
||||||
if not isinstance(entry_list, list):
|
if not isinstance(entry_list, list):
|
||||||
return []
|
return []
|
||||||
entries = []
|
entries = []
|
||||||
for episode in entry_list:
|
for episode in entry_list:
|
||||||
nrk_id = episode.get('prfId')
|
nrk_id = episode.get('prfId') or episode.get('episodeId')
|
||||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||||
continue
|
continue
|
||||||
|
if not re.match(NRKTVIE._EPISODE_RE, nrk_id):
|
||||||
|
continue
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
_ASSETS_KEYS = ('episodes', 'instalments',)
|
||||||
|
|
||||||
|
def _extract_assets_key(self, embedded):
|
||||||
|
for asset_key in self._ASSETS_KEYS:
|
||||||
|
if embedded.get(asset_key):
|
||||||
|
return asset_key
|
||||||
|
|
||||||
|
def _entries(self, data, display_id):
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
embedded = data.get('_embedded') or data
|
||||||
|
if not isinstance(embedded, dict):
|
||||||
|
break
|
||||||
|
assets_key = self._extract_assets_key(embedded)
|
||||||
|
if not assets_key:
|
||||||
|
break
|
||||||
|
# Extract entries
|
||||||
|
entries = try_get(
|
||||||
|
embedded,
|
||||||
|
(lambda x: x[assets_key]['_embedded'][assets_key],
|
||||||
|
lambda x: x[assets_key]),
|
||||||
|
list)
|
||||||
|
for e in self._extract_entries(entries):
|
||||||
|
yield e
|
||||||
|
# Find next URL
|
||||||
|
next_url_path = try_get(
|
||||||
|
data,
|
||||||
|
(lambda x: x['_links']['next']['href'],
|
||||||
|
lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
|
||||||
|
compat_str)
|
||||||
|
if not next_url_path:
|
||||||
|
break
|
||||||
|
data = self._call_api(
|
||||||
|
next_url_path, display_id,
|
||||||
|
note='Downloading %s JSON page %d' % (assets_key, page_num),
|
||||||
|
fatal=False)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1',
|
'id': 'backstage/1',
|
||||||
'title': 'Sesong 1',
|
'title': 'Sesong 1',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 30,
|
'playlist_mincount': 30,
|
||||||
}
|
}, {
|
||||||
|
# no /sesong/ in path
|
||||||
|
'url': 'https://tv.nrk.no/serie/lindmo/2016',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lindmo/2016',
|
||||||
|
'title': '2016',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 29,
|
||||||
|
}, {
|
||||||
|
# weird nested _embedded in catalog JSON response
|
||||||
|
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dickie-dick-dickens/1',
|
||||||
|
'title': 'Sesong 1',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}, {
|
||||||
|
# 841 entries, multi page
|
||||||
|
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dagsnytt/201509',
|
||||||
|
'title': 'September 2015',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 841,
|
||||||
|
}, {
|
||||||
|
# 180 entries, single page
|
||||||
|
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
|
@ -492,25 +609,35 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||||
else super(NRKTVSeasonIE, cls).suitable(url))
|
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
domain, serie, season_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
display_id = '%s/%s' % (serie, season_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
data = self._call_api(
|
||||||
|
'%s/catalog/series/%s/seasons/%s' % (domain, serie, season_id),
|
||||||
|
display_id, 'season', query={'pageSize': 50})
|
||||||
|
|
||||||
series = self._extract_series(webpage, display_id)
|
title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
|
||||||
|
|
||||||
season = next(
|
|
||||||
s for s in series['seasons']
|
|
||||||
if int(display_id) == s.get('seasonNumber'))
|
|
||||||
|
|
||||||
title = try_get(season, lambda x: x['titles']['title'], compat_str)
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._extract_episodes(season), display_id, title)
|
self._entries(data, display_id),
|
||||||
|
display_id, title)
|
||||||
|
|
||||||
|
|
||||||
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/serie/(?P<id>[^/]+)'
|
||||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# new layout, instalments
|
||||||
|
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'groenn-glede',
|
||||||
|
'title': 'Grønn glede',
|
||||||
|
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 90,
|
||||||
|
}, {
|
||||||
|
# new layout, instalments, more entries
|
||||||
|
'url': 'https://tv.nrk.no/serie/lindmo',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
'url': 'https://tv.nrk.no/serie/blank',
|
'url': 'https://tv.nrk.no/serie/blank',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'blank',
|
'id': 'blank',
|
||||||
|
@ -524,25 +651,16 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'backstage',
|
'id': 'backstage',
|
||||||
'title': 'Backstage',
|
'title': 'Backstage',
|
||||||
'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
|
'description': 'md5:63692ceb96813d9a207e9910483d948b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 60,
|
'playlist_mincount': 60,
|
||||||
}, {
|
|
||||||
# new layout, instalments
|
|
||||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'groenn-glede',
|
|
||||||
'title': 'Grønn glede',
|
|
||||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 10,
|
|
||||||
}, {
|
}, {
|
||||||
# old layout
|
# old layout
|
||||||
'url': 'https://tv.nrksuper.no/serie/labyrint',
|
'url': 'https://tv.nrksuper.no/serie/labyrint',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'labyrint',
|
'id': 'labyrint',
|
||||||
'title': 'Labyrint',
|
'title': 'Labyrint',
|
||||||
'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
|
'description': 'I Daidalos sin undersjøiske Labyrint venter spennende oppgaver, skumle robotskapninger og slim.',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
}, {
|
||||||
|
@ -554,6 +672,17 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dickie-dick-dickens',
|
||||||
|
'title': 'Dickie Dick Dickens',
|
||||||
|
'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
}, {
|
||||||
|
'url': 'https://nrksuper.no/serie/labyrint',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -564,43 +693,42 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||||
else super(NRKTVSeriesIE, cls).suitable(url))
|
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
series_id = self._match_id(url)
|
site, series_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
is_radio = site == 'radio.nrk'
|
||||||
|
domain = 'radio' if is_radio else 'tv'
|
||||||
|
|
||||||
webpage = self._download_webpage(url, series_id)
|
size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
|
||||||
|
series = self._call_api(
|
||||||
|
'%s/catalog/series/%s' % (domain, series_id),
|
||||||
|
series_id, 'serie', query={size_prefix + 'ageSize': 50})
|
||||||
|
titles = try_get(series, [
|
||||||
|
lambda x: x['titles'],
|
||||||
|
lambda x: x[x['type']]['titles'],
|
||||||
|
lambda x: x[x['seriesType']]['titles'],
|
||||||
|
]) or {}
|
||||||
|
|
||||||
# New layout (e.g. https://tv.nrk.no/serie/backstage)
|
entries = []
|
||||||
series = self._extract_series(webpage, series_id, fatal=False)
|
entries.extend(self._entries(series, series_id))
|
||||||
if series:
|
embedded = series.get('_embedded') or {}
|
||||||
title = try_get(series, lambda x: x['titles']['title'], compat_str)
|
linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or []
|
||||||
description = try_get(
|
embedded_seasons = embedded.get('seasons') or []
|
||||||
series, lambda x: x['titles']['subtitle'], compat_str)
|
if len(linked_seasons) > len(embedded_seasons):
|
||||||
entries = []
|
for season in linked_seasons:
|
||||||
entries.extend(self._extract_seasons(series.get('seasons')))
|
season_name = season.get('name')
|
||||||
entries.extend(self._extract_entries(series.get('instalments')))
|
if season_name and isinstance(season_name, compat_str):
|
||||||
entries.extend(self._extract_episodes(series.get('extraMaterial')))
|
entries.append(self.url_result(
|
||||||
return self.playlist_result(entries, series_id, title, description)
|
'https://%s.nrk.no/serie/%s/sesong/%s'
|
||||||
|
% (domain, series_id, season_name),
|
||||||
|
ie=NRKTVSeasonIE.ie_key(),
|
||||||
|
video_title=season.get('title')))
|
||||||
|
else:
|
||||||
|
for season in embedded_seasons:
|
||||||
|
entries.extend(self._entries(season, series_id))
|
||||||
|
entries.extend(self._entries(
|
||||||
|
embedded.get('extraMaterial') or {}, series_id))
|
||||||
|
|
||||||
# Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
|
return self.playlist_result(
|
||||||
entries = [
|
entries, series_id, titles.get('title'), titles.get('subtitle'))
|
||||||
self.url_result(
|
|
||||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
|
||||||
series=series_id, season=season_id))
|
|
||||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
|
||||||
]
|
|
||||||
|
|
||||||
title = self._html_search_meta(
|
|
||||||
'seriestitle', webpage,
|
|
||||||
'title', default=None) or self._og_search_title(
|
|
||||||
webpage, fatal=False)
|
|
||||||
if title:
|
|
||||||
title = self._search_regex(
|
|
||||||
r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
|
|
||||||
|
|
||||||
description = self._html_search_meta(
|
|
||||||
'series_description', webpage,
|
|
||||||
'description', default=None) or self._og_search_description(webpage)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, series_id, title, description)
|
|
||||||
|
|
||||||
|
|
||||||
class NRKTVDirekteIE(NRKTVIE):
|
class NRKTVDirekteIE(NRKTVIE):
|
||||||
|
@ -704,14 +832,8 @@ class NRKSkoleIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
nrk_id = self._download_json(
|
||||||
'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id,
|
'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id,
|
||||||
video_id)
|
video_id)['psId']
|
||||||
|
|
||||||
nrk_id = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
|
|
||||||
webpage, 'application json'),
|
|
||||||
video_id)['activeMedia']['psId']
|
|
||||||
|
|
||||||
return self.url_result('nrk:%s' % nrk_id)
|
return self.url_result('nrk:%s' % nrk_id)
|
||||||
|
|
|
@ -221,3 +221,41 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||||
webpage, 'podcast data')
|
webpage, 'podcast data')
|
||||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||||
|
|
||||||
|
|
||||||
|
class NYTimesCookingIE(NYTimesBaseIE):
|
||||||
|
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||||
|
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000004756089',
|
||||||
|
'ext': 'mov',
|
||||||
|
'timestamp': 1479383008,
|
||||||
|
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||||
|
'title': 'Cranberry Tart',
|
||||||
|
'upload_date': '20161117',
|
||||||
|
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||||
|
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000003951728',
|
||||||
|
'ext': 'mov',
|
||||||
|
'timestamp': 1445509539,
|
||||||
|
'description': 'Turkey guide',
|
||||||
|
'upload_date': '20151022',
|
||||||
|
'title': 'Turkey',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
page_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||||
|
|
||||||
|
return self._extract_video_from_id(video_id)
|
||||||
|
|
|
@ -162,13 +162,12 @@ class ORFTVthekIE(InfoExtractor):
|
||||||
class ORFRadioIE(InfoExtractor):
|
class ORFRadioIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
station = mobj.group('station')
|
|
||||||
show_date = mobj.group('date')
|
show_date = mobj.group('date')
|
||||||
show_id = mobj.group('show')
|
show_id = mobj.group('show')
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
|
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
|
||||||
% (station, show_id, show_date), show_id)
|
% (self._API_STATION, show_id, show_date), show_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for info in data['streams']:
|
for info in data['streams']:
|
||||||
|
@ -183,7 +182,7 @@ class ORFRadioIE(InfoExtractor):
|
||||||
duration = end - start if end and start else None
|
duration = end - start if end and start else None
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': loop_stream_id.replace('.mp3', ''),
|
'id': loop_stream_id.replace('.mp3', ''),
|
||||||
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, loop_stream_id),
|
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': clean_html(data.get('subtitle')),
|
'description': clean_html(data.get('subtitle')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
@ -205,6 +204,8 @@ class ORFFM4IE(ORFRadioIE):
|
||||||
IE_NAME = 'orf:fm4'
|
IE_NAME = 'orf:fm4'
|
||||||
IE_DESC = 'radio FM4'
|
IE_DESC = 'radio FM4'
|
||||||
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
|
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
|
||||||
|
_API_STATION = 'fm4'
|
||||||
|
_LOOP_STATION = 'fm4'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://fm4.orf.at/player/20170107/4CC',
|
'url': 'http://fm4.orf.at/player/20170107/4CC',
|
||||||
|
@ -223,10 +224,142 @@ class ORFFM4IE(ORFRadioIE):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFNOEIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:noe'
|
||||||
|
IE_DESC = 'Radio Niederösterreich'
|
||||||
|
_VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'noe'
|
||||||
|
_LOOP_STATION = 'oe2n'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://noe.orf.at/player/20200423/NGM',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFWIEIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:wien'
|
||||||
|
IE_DESC = 'Radio Wien'
|
||||||
|
_VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'wie'
|
||||||
|
_LOOP_STATION = 'oe2w'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://wien.orf.at/player/20200423/WGUM',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFBGLIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:burgenland'
|
||||||
|
IE_DESC = 'Radio Burgenland'
|
||||||
|
_VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'bgl'
|
||||||
|
_LOOP_STATION = 'oe2b'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://burgenland.orf.at/player/20200423/BGM',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFOOEIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:oberoesterreich'
|
||||||
|
IE_DESC = 'Radio Oberösterreich'
|
||||||
|
_VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'ooe'
|
||||||
|
_LOOP_STATION = 'oe2o'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://ooe.orf.at/player/20200423/OGMO',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFSTMIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:steiermark'
|
||||||
|
IE_DESC = 'Radio Steiermark'
|
||||||
|
_VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'stm'
|
||||||
|
_LOOP_STATION = 'oe2st'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://steiermark.orf.at/player/20200423/STGMS',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFKTNIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:kaernten'
|
||||||
|
IE_DESC = 'Radio Kärnten'
|
||||||
|
_VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'ktn'
|
||||||
|
_LOOP_STATION = 'oe2k'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFSBGIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:salzburg'
|
||||||
|
IE_DESC = 'Radio Salzburg'
|
||||||
|
_VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'sbg'
|
||||||
|
_LOOP_STATION = 'oe2s'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://salzburg.orf.at/player/20200423/SGUM',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFTIRIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:tirol'
|
||||||
|
IE_DESC = 'Radio Tirol'
|
||||||
|
_VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'tir'
|
||||||
|
_LOOP_STATION = 'oe2t'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://tirol.orf.at/player/20200423/TGUMO',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFVBGIE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:vorarlberg'
|
||||||
|
IE_DESC = 'Radio Vorarlberg'
|
||||||
|
_VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'vbg'
|
||||||
|
_LOOP_STATION = 'oe2v'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFOE3IE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:oe3'
|
||||||
|
IE_DESC = 'Radio Österreich 3'
|
||||||
|
_VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'oe3'
|
||||||
|
_LOOP_STATION = 'oe3'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://oe3.orf.at/player/20200424/3WEK',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ORFOE1IE(ORFRadioIE):
|
class ORFOE1IE(ORFRadioIE):
|
||||||
IE_NAME = 'orf:oe1'
|
IE_NAME = 'orf:oe1'
|
||||||
IE_DESC = 'Radio Österreich 1'
|
IE_DESC = 'Radio Österreich 1'
|
||||||
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
_API_STATION = 'oe1'
|
||||||
|
_LOOP_STATION = 'oe1'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://oe1.orf.at/player/20170108/456544',
|
'url': 'http://oe1.orf.at/player/20170108/456544',
|
||||||
|
|
|
@ -477,7 +477,7 @@ class PBSIE(InfoExtractor):
|
||||||
if media_id:
|
if media_id:
|
||||||
return media_id, presumptive_id, upload_date, description
|
return media_id, presumptive_id, upload_date, description
|
||||||
|
|
||||||
# Fronline video embedded via flp
|
# Frontline video embedded via flp
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
||||||
if video_id:
|
if video_id:
|
||||||
|
|
|
@ -541,6 +541,10 @@ class PeerTubeIE(InfoExtractor):
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'filesize': file_size,
|
'filesize': file_size,
|
||||||
})
|
})
|
||||||
|
if format_id == '0p':
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
else:
|
||||||
|
f['fps'] = int_or_none(file_.get('fps'))
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ class PeriscopeBaseIE(InfoExtractor):
|
||||||
item_id, query=query)
|
item_id, query=query)
|
||||||
|
|
||||||
def _parse_broadcast_data(self, broadcast, video_id):
|
def _parse_broadcast_data(self, broadcast, video_id):
|
||||||
title = broadcast['status']
|
title = broadcast.get('status') or 'Periscope Broadcast'
|
||||||
uploader = broadcast.get('user_display_name') or broadcast.get('username')
|
uploader = broadcast.get('user_display_name') or broadcast.get('username')
|
||||||
title = '%s - %s' % (uploader, title) if uploader else title
|
title = '%s - %s' % (uploader, title) if uploader else title
|
||||||
is_live = broadcast.get('state').lower() == 'running'
|
is_live = broadcast.get('state').lower() == 'running'
|
||||||
|
|
201
youtube_dl/extractor/pinterest.py
Normal file
201
youtube_dl/extractor/pinterest.py
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
|
||||||
|
|
||||||
|
def _call_api(self, resource, video_id, options):
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.pinterest.com/resource/%sResource/get/' % resource,
|
||||||
|
video_id, 'Download %s JSON metadata' % resource, query={
|
||||||
|
'data': json.dumps({'options': options})
|
||||||
|
})['resource_response']
|
||||||
|
|
||||||
|
def _extract_video(self, data, extract_formats=True):
|
||||||
|
video_id = data['id']
|
||||||
|
|
||||||
|
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
duration = None
|
||||||
|
if extract_formats:
|
||||||
|
for format_id, format_dict in data['videos']['video_list'].items():
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(format_dict.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'width': int_or_none(format_dict.get('width')),
|
||||||
|
'height': int_or_none(format_dict.get('height')),
|
||||||
|
'duration': duration,
|
||||||
|
})
|
||||||
|
self._sort_formats(
|
||||||
|
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||||
|
|
||||||
|
description = data.get('description') or data.get('description_html') or data.get('seo_description')
|
||||||
|
timestamp = unified_timestamp(data.get('created_at'))
|
||||||
|
|
||||||
|
def _u(field):
|
||||||
|
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
|
||||||
|
|
||||||
|
uploader = _u('full_name')
|
||||||
|
uploader_id = _u('id')
|
||||||
|
|
||||||
|
repost_count = int_or_none(data.get('repin_count'))
|
||||||
|
comment_count = int_or_none(data.get('comment_count'))
|
||||||
|
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
|
||||||
|
tags = data.get('hashtags')
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
images = data.get('images')
|
||||||
|
if isinstance(images, dict):
|
||||||
|
for thumbnail_id, thumbnail in images.items():
|
||||||
|
if not isinstance(thumbnail, dict):
|
||||||
|
continue
|
||||||
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'repost_count': repost_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'categories': categories,
|
||||||
|
'tags': tags,
|
||||||
|
'formats': formats,
|
||||||
|
'extractor_key': PinterestIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestIE(PinterestBaseIE):
|
||||||
|
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||||
|
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '664281013778109217',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Origami',
|
||||||
|
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
|
||||||
|
'duration': 57.7,
|
||||||
|
'timestamp': 1593073622,
|
||||||
|
'upload_date': '20200625',
|
||||||
|
'uploader': 'Love origami -I am Dafei',
|
||||||
|
'uploader_id': '586523688879454212',
|
||||||
|
'repost_count': 50,
|
||||||
|
'comment_count': 0,
|
||||||
|
'categories': list,
|
||||||
|
'tags': list,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api(
|
||||||
|
'Pin', video_id, {
|
||||||
|
'field_set_key': 'unauth_react_main_pin',
|
||||||
|
'id': video_id,
|
||||||
|
})['data']
|
||||||
|
return self._extract_video(data)
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestCollectionIE(PinterestBaseIE):
|
||||||
|
_VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '585890301462791043',
|
||||||
|
'title': 'cool diys',
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pinterest.ca/fudohub/videos/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '682858430939307450',
|
||||||
|
'title': 'VIDEOS',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 365,
|
||||||
|
'skip': 'Test with extract_formats=False',
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if PinterestIE.suitable(url) else super(
|
||||||
|
PinterestCollectionIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
username, slug = re.match(self._VALID_URL, url).groups()
|
||||||
|
board = self._call_api(
|
||||||
|
'Board', slug, {
|
||||||
|
'slug': slug,
|
||||||
|
'username': username
|
||||||
|
})['data']
|
||||||
|
board_id = board['id']
|
||||||
|
options = {
|
||||||
|
'board_id': board_id,
|
||||||
|
'page_size': 250,
|
||||||
|
}
|
||||||
|
bookmark = None
|
||||||
|
entries = []
|
||||||
|
while True:
|
||||||
|
if bookmark:
|
||||||
|
options['bookmarks'] = [bookmark]
|
||||||
|
board_feed = self._call_api('BoardFeed', board_id, options)
|
||||||
|
for item in (board_feed.get('data') or []):
|
||||||
|
if not isinstance(item, dict) or item.get('type') != 'pin':
|
||||||
|
continue
|
||||||
|
video_id = item.get('id')
|
||||||
|
if video_id:
|
||||||
|
# Some pins may not be available anonymously via pin URL
|
||||||
|
# video = self._extract_video(item, extract_formats=False)
|
||||||
|
# video.update({
|
||||||
|
# '_type': 'url_transparent',
|
||||||
|
# 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
|
||||||
|
# })
|
||||||
|
# entries.append(video)
|
||||||
|
entries.append(self._extract_video(item))
|
||||||
|
bookmark = board_feed.get('bookmark')
|
||||||
|
if not bookmark:
|
||||||
|
break
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id=board_id, playlist_title=board.get('name'))
|
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
|
@ -30,7 +31,12 @@ class PornHubBaseIE(InfoExtractor):
|
||||||
def dl(*args, **kwargs):
|
def dl(*args, **kwargs):
|
||||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||||
|
|
||||||
webpage, urlh = dl(*args, **kwargs)
|
ret = dl(*args, **kwargs)
|
||||||
|
|
||||||
|
if not ret:
|
||||||
|
return ret
|
||||||
|
|
||||||
|
webpage, urlh = ret
|
||||||
|
|
||||||
if any(re.search(p, webpage) for p in (
|
if any(re.search(p, webpage) for p in (
|
||||||
r'<body\b[^>]+\bonload=["\']go\(\)',
|
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||||
|
@ -52,20 +58,21 @@ class PornHubIE(PornHubBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
(?:www\.)?thumbzilla\.com/video/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
'md5': '1e19b41231a02eba417839222ac9d58e',
|
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '648719015',
|
'id': '648719015',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
|
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
|
||||||
'uploader': 'Babes',
|
'uploader': 'Babes',
|
||||||
'upload_date': '20130628',
|
'upload_date': '20130628',
|
||||||
|
'timestamp': 1372447216,
|
||||||
'duration': 361,
|
'duration': 361,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
@ -82,8 +89,8 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'id': '1331683002',
|
'id': '1331683002',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '重庆婷婷女王足交',
|
'title': '重庆婷婷女王足交',
|
||||||
'uploader': 'Unknown',
|
|
||||||
'upload_date': '20150213',
|
'upload_date': '20150213',
|
||||||
|
'timestamp': 1423804862,
|
||||||
'duration': 1753,
|
'duration': 1753,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
@ -121,6 +128,7 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'This video has been disabled',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -149,6 +157,9 @@ class PornHubIE(PornHubBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -157,7 +168,7 @@ class PornHubIE(PornHubBaseIE):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
|
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _extract_count(self, pattern, webpage, name):
|
def _extract_count(self, pattern, webpage, name):
|
||||||
|
@ -338,14 +349,14 @@ class PornHubIE(PornHubBaseIE):
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', default=None)
|
||||||
|
|
||||||
view_count = self._extract_count(
|
view_count = self._extract_count(
|
||||||
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||||
like_count = self._extract_count(
|
like_count = self._extract_count(
|
||||||
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
|
||||||
dislike_count = self._extract_count(
|
dislike_count = self._extract_count(
|
||||||
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
|
||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
|
@ -356,7 +367,11 @@ class PornHubIE(PornHubBaseIE):
|
||||||
if div:
|
if div:
|
||||||
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
||||||
|
|
||||||
return {
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
# description provided in JSON-LD is irrelevant
|
||||||
|
info['description'] = None
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
|
@ -372,7 +387,7 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'tags': extract_list('tags'),
|
'tags': extract_list('tags'),
|
||||||
'categories': extract_list('categories'),
|
'categories': extract_list('categories'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
|
@ -415,7 +430,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||||
'playlist_mincount': 118,
|
'playlist_mincount': 118,
|
||||||
|
@ -483,7 +498,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -598,7 +613,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -175,7 +176,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
(?:
|
(?:
|
||||||
(?:beta\.)?
|
(?:beta\.)?
|
||||||
(?:
|
(?:
|
||||||
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
|
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
|
||||||
)\.(?:de|at|ch)|
|
)\.(?:de|at|ch)|
|
||||||
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
|
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
|
||||||
)
|
)
|
||||||
|
@ -193,10 +194,14 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2104602',
|
'id': '2104602',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Episode 18 - Staffel 2',
|
'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
|
||||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||||
'upload_date': '20131231',
|
'upload_date': '20131231',
|
||||||
'duration': 5845.04,
|
'duration': 5845.04,
|
||||||
|
'series': 'CIRCUS HALLIGALLI',
|
||||||
|
'season_number': 2,
|
||||||
|
'episode': 'Episode 18 - Staffel 2',
|
||||||
|
'episode_number': 18,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -300,8 +305,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2572814',
|
'id': '2572814',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Andreas Kümmert: Rocket Man',
|
'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
|
||||||
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
||||||
|
'timestamp': 1382041620,
|
||||||
'upload_date': '20131017',
|
'upload_date': '20131017',
|
||||||
'duration': 469.88,
|
'duration': 469.88,
|
||||||
},
|
},
|
||||||
|
@ -310,7 +316,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
|
'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2156342',
|
'id': '2156342',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -332,19 +338,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
'skip': 'This video is unavailable',
|
'skip': 'This video is unavailable',
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4187506',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Best of Circus HalliGalli',
|
|
||||||
'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
|
|
||||||
'upload_date': '20151229',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
# title in <h2 class="subtitle">
|
# title in <h2 class="subtitle">
|
||||||
'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
|
'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
|
||||||
|
@ -421,7 +414,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
|
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
|
||||||
]
|
]
|
||||||
_UPLOAD_DATE_REGEXES = [
|
_UPLOAD_DATE_REGEXES = [
|
||||||
r'<meta property="og:published_time" content="(.+?)">',
|
|
||||||
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
|
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
|
||||||
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
|
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
|
||||||
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
|
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
|
||||||
|
@ -451,17 +443,21 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
if description is None:
|
if description is None:
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(
|
||||||
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
self._html_search_meta('og:published_time', webpage,
|
||||||
|
'upload date', default=None)
|
||||||
|
or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
|
||||||
|
webpage, 'upload date', default=None))
|
||||||
|
|
||||||
info.update({
|
json_ld = self._search_json_ld(webpage, clip_id, default={})
|
||||||
|
|
||||||
|
return merge_dicts(info, {
|
||||||
'id': clip_id,
|
'id': clip_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
})
|
}, json_ld)
|
||||||
return info
|
|
||||||
|
|
||||||
def _extract_playlist(self, url, webpage):
|
def _extract_playlist(self, url, webpage):
|
||||||
playlist_id = self._html_search_regex(
|
playlist_id = self._html_search_regex(
|
||||||
|
|
|
@ -82,17 +82,6 @@ class PuhuTVIE(InfoExtractor):
|
||||||
urls = []
|
urls = []
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
def add_http_from_hls(m3u8_f):
|
|
||||||
http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4')
|
|
||||||
if http_url != m3u8_f['url']:
|
|
||||||
f = m3u8_f.copy()
|
|
||||||
f.update({
|
|
||||||
'format_id': f['format_id'].replace('hls', 'http'),
|
|
||||||
'protocol': 'http',
|
|
||||||
'url': http_url,
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
for video in videos['data']['videos']:
|
for video in videos['data']['videos']:
|
||||||
media_url = url_or_none(video.get('url'))
|
media_url = url_or_none(video.get('url'))
|
||||||
if not media_url or media_url in urls:
|
if not media_url or media_url in urls:
|
||||||
|
@ -101,12 +90,9 @@ class PuhuTVIE(InfoExtractor):
|
||||||
|
|
||||||
playlist = video.get('is_playlist')
|
playlist = video.get('is_playlist')
|
||||||
if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
|
if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False))
|
||||||
for m3u8_f in m3u8_formats:
|
|
||||||
formats.append(m3u8_f)
|
|
||||||
add_http_from_hls(m3u8_f)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
quality = int_or_none(video.get('quality'))
|
quality = int_or_none(video.get('quality'))
|
||||||
|
@ -128,8 +114,6 @@ class PuhuTVIE(InfoExtractor):
|
||||||
format_id += '-%sp' % quality
|
format_id += '-%sp' % quality
|
||||||
f['format_id'] = format_id
|
f['format_id'] = format_id
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
if is_hls:
|
|
||||||
add_http_from_hls(f)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
creator = try_get(
|
creator = try_get(
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
@ -15,9 +16,9 @@ from ..utils import (
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
remove_start,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
@ -67,7 +68,7 @@ class RaiBaseIE(InfoExtractor):
|
||||||
|
|
||||||
# This does not imply geo restriction (e.g.
|
# This does not imply geo restriction (e.g.
|
||||||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
||||||
if media_url == 'http://download.rai.it/video_no_available.mp4':
|
if '/video_no_available.mp4' in media_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ext = determine_ext(media_url)
|
ext = determine_ext(media_url)
|
||||||
|
@ -122,40 +123,20 @@ class RaiBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayIE(RaiBaseIE):
|
class RaiPlayIE(RaiBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
|
|
||||||
'md5': '340aa3b7afb54bfd14a8c11786450d76',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'La Casa Bianca',
|
|
||||||
'alt_title': 'S2016 - Puntata del 23/10/2016',
|
|
||||||
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'uploader': 'Rai 3',
|
|
||||||
'creator': 'Rai 3',
|
|
||||||
'duration': 3278,
|
|
||||||
'timestamp': 1477764300,
|
|
||||||
'upload_date': '20161029',
|
|
||||||
'series': 'La Casa Bianca',
|
|
||||||
'season': '2016',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Report del 07/04/2014',
|
'title': 'Report del 07/04/2014',
|
||||||
'alt_title': 'S2013/14 - Puntata del 07/04/2014',
|
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014',
|
||||||
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
|
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Rai 5',
|
'uploader': 'Rai Gulp',
|
||||||
'creator': 'Rai 5',
|
|
||||||
'duration': 6160,
|
'duration': 6160,
|
||||||
'series': 'Report',
|
'series': 'Report',
|
||||||
'season_number': 5,
|
|
||||||
'season': '2013/14',
|
'season': '2013/14',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -167,48 +148,52 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
base, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
url, video_id = mobj.group('url', 'id')
|
|
||||||
|
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
'%s?json' % url, video_id, 'Downloading video JSON')
|
base + '.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
title = media['name']
|
title = media['name']
|
||||||
|
|
||||||
video = media['video']
|
video = media['video']
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
|
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
||||||
self._sort_formats(relinker_info['formats'])
|
self._sort_formats(relinker_info['formats'])
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
if 'images' in media:
|
for _, value in media.get('images', {}).items():
|
||||||
for _, value in media.get('images').items():
|
if value:
|
||||||
if value:
|
thumbnails.append({
|
||||||
thumbnails.append({
|
'url': urljoin(url, value),
|
||||||
'url': value.replace('[RESOLUTION]', '600x400')
|
})
|
||||||
})
|
|
||||||
|
|
||||||
timestamp = unified_timestamp(try_get(
|
date_published = media.get('date_published')
|
||||||
media, lambda x: x['availabilities'][0]['start'], compat_str))
|
time_published = media.get('time_published')
|
||||||
|
if date_published and time_published:
|
||||||
|
date_published += ' ' + time_published
|
||||||
|
|
||||||
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
||||||
|
|
||||||
|
program_info = media.get('program_info') or {}
|
||||||
|
season = media.get('season')
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
|
||||||
|
'display_id': video_id,
|
||||||
'title': self._live_title(title) if relinker_info.get(
|
'title': self._live_title(title) if relinker_info.get(
|
||||||
'is_live') else title,
|
'is_live') else title,
|
||||||
'alt_title': media.get('subtitle'),
|
'alt_title': strip_or_none(media.get('subtitle')),
|
||||||
'description': media.get('description'),
|
'description': media.get('description'),
|
||||||
'uploader': strip_or_none(media.get('channel')),
|
'uploader': strip_or_none(media.get('channel')),
|
||||||
'creator': strip_or_none(media.get('editor')),
|
'creator': strip_or_none(media.get('editor') or None),
|
||||||
'duration': parse_duration(video.get('duration')),
|
'duration': parse_duration(video.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': unified_timestamp(date_published),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'series': try_get(
|
'series': program_info.get('name'),
|
||||||
media, lambda x: x['isPartOf']['name'], compat_str),
|
'season_number': int_or_none(season),
|
||||||
'season_number': int_or_none(try_get(
|
'season': season if (season and not season.isdigit()) else None,
|
||||||
media, lambda x: x['isPartOf']['numeroStagioni'])),
|
'episode': media.get('episode_title'),
|
||||||
'season': media.get('stagione') or None,
|
'episode_number': int_or_none(media.get('episode')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,16 +201,16 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayLiveIE(RaiBaseIE):
|
class RaiPlayLiveIE(RaiPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/dirette/rainews24',
|
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||||
'display_id': 'rainews24',
|
'display_id': 'rainews24',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': 'md5:6eca31500550f9376819f174e5644754',
|
'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
|
||||||
'uploader': 'Rai News 24',
|
'uploader': 'Rai News 24',
|
||||||
'creator': 'Rai News 24',
|
'creator': 'Rai News 24',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
@ -233,58 +218,50 @@ class RaiPlayLiveIE(RaiBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
|
||||||
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
|
|
||||||
webpage, 'content id')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': RaiPlayIE.ie_key(),
|
|
||||||
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayPlaylistIE(InfoExtractor):
|
class RaiPlayPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nondirloalmiocapo',
|
'id': 'nondirloalmiocapo',
|
||||||
'title': 'Non dirlo al mio capo',
|
'title': 'Non dirlo al mio capo',
|
||||||
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
|
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 12,
|
'playlist_mincount': 12,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
base, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
program = self._download_json(
|
||||||
|
base + '.json', playlist_id, 'Downloading program JSON')
|
||||||
title = self._html_search_meta(
|
|
||||||
('programma', 'nomeProgramma'), webpage, 'title')
|
|
||||||
description = unescapeHTML(self._html_search_meta(
|
|
||||||
('description', 'og:description'), webpage, 'description'))
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for mobj in re.finditer(
|
for b in (program.get('blocks') or []):
|
||||||
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
|
for s in (b.get('sets') or []):
|
||||||
webpage):
|
s_id = s.get('id')
|
||||||
video_url = urljoin(url, mobj.group('path'))
|
if not s_id:
|
||||||
entries.append(self.url_result(
|
continue
|
||||||
video_url, ie=RaiPlayIE.ie_key(),
|
medias = self._download_json(
|
||||||
video_id=RaiPlayIE._match_id(video_url)))
|
'%s/%s.json' % (base, s_id), s_id,
|
||||||
|
'Downloading content set JSON', fatal=False)
|
||||||
|
if not medias:
|
||||||
|
continue
|
||||||
|
for m in (medias.get('items') or []):
|
||||||
|
path_id = m.get('path_id')
|
||||||
|
if not path_id:
|
||||||
|
continue
|
||||||
|
video_url = urljoin(url, path_id)
|
||||||
|
entries.append(self.url_result(
|
||||||
|
video_url, ie=RaiPlayIE.ie_key(),
|
||||||
|
video_id=RaiPlayIE._match_id(video_url)))
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, program.get('name'),
|
||||||
|
try_get(program, lambda x: x['program_info']['description']))
|
||||||
|
|
||||||
|
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
|
@ -300,7 +277,8 @@ class RaiIE(RaiBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 1758,
|
'duration': 1758,
|
||||||
'upload_date': '20140612',
|
'upload_date': '20140612',
|
||||||
}
|
},
|
||||||
|
'skip': 'This content is available only in Italy',
|
||||||
}, {
|
}, {
|
||||||
# with ContentItem in many metas
|
# with ContentItem in many metas
|
||||||
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
||||||
|
@ -316,7 +294,7 @@ class RaiIE(RaiBaseIE):
|
||||||
}, {
|
}, {
|
||||||
# with ContentItem in og:url
|
# with ContentItem in og:url
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||||
'md5': '11959b4e44fa74de47011b5799490adf',
|
'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -326,18 +304,6 @@ class RaiIE(RaiBaseIE):
|
||||||
'duration': 2214,
|
'duration': 2214,
|
||||||
'upload_date': '20161103',
|
'upload_date': '20161103',
|
||||||
}
|
}
|
||||||
}, {
|
|
||||||
# drawMediaRaiTV(...)
|
|
||||||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
|
||||||
'md5': '2dd727e61114e1ee9c47f0da6914e178',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Il pacco',
|
|
||||||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'upload_date': '20141221',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# initEdizione('ContentItem-...'
|
# initEdizione('ContentItem-...'
|
||||||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
||||||
|
@ -349,17 +315,6 @@ class RaiIE(RaiBaseIE):
|
||||||
'upload_date': '20170401',
|
'upload_date': '20170401',
|
||||||
},
|
},
|
||||||
'skip': 'Changes daily',
|
'skip': 'Changes daily',
|
||||||
}, {
|
|
||||||
# HDS live stream with only relinker URL
|
|
||||||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'EuroNews',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# HLS live stream with ContentItem in og:url
|
# HLS live stream with ContentItem in og:url
|
||||||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
||||||
|
@ -469,7 +424,7 @@ class RaiIE(RaiBaseIE):
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
relinker_url = self._search_regex(
|
relinker_url = self._proto_relative_url(self._search_regex(
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
var\s+videoURL|
|
var\s+videoURL|
|
||||||
|
@ -481,7 +436,7 @@ class RaiIE(RaiBaseIE):
|
||||||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
||||||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
||||||
''',
|
''',
|
||||||
webpage, 'relinker URL', group='url')
|
webpage, 'relinker URL', group='url'))
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(
|
relinker_info = self._extract_relinker_info(
|
||||||
urljoin(url, relinker_url), video_id)
|
urljoin(url, relinker_url), video_id)
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -10,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class RedBullTVIE(InfoExtractor):
|
class RedBullTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live)/(?P<id>AP-\w+)'
|
_VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# film
|
# film
|
||||||
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
|
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
|
||||||
|
@ -29,8 +31,8 @@ class RedBullTVIE(InfoExtractor):
|
||||||
'id': 'AP-1PMHKJFCW1W11',
|
'id': 'AP-1PMHKJFCW1W11',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Grime - Hashtags S2E4',
|
'title': 'Grime - Hashtags S2E4',
|
||||||
'description': 'md5:b5f522b89b72e1e23216e5018810bb25',
|
'description': 'md5:5546aa612958c08a98faaad4abce484d',
|
||||||
'duration': 904.6,
|
'duration': 904,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -44,11 +46,15 @@ class RedBullTVIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11',
|
'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def extract_info(self, video_id):
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
session = self._download_json(
|
session = self._download_json(
|
||||||
'https://api.redbull.tv/v3/session', video_id,
|
'https://api.redbull.tv/v3/session', video_id,
|
||||||
note='Downloading access token', query={
|
note='Downloading access token', query={
|
||||||
|
@ -105,24 +111,119 @@ class RedBullTVIE(InfoExtractor):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.extract_info(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class RedBullEmbedIE(RedBullTVIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})'
|
||||||
|
_TESTS = [{
|
||||||
|
# HLS manifest accessible only using assetId
|
||||||
|
'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_VIDEO_ESSENSE_TMPL = '''... on %s {
|
||||||
|
videoEssence {
|
||||||
|
attributes
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
rrn_id = self._match_id(url)
|
||||||
|
asset_id = self._download_json(
|
||||||
|
'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
|
||||||
|
rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'},
|
||||||
|
query={
|
||||||
|
'query': '''{
|
||||||
|
resource(id: "%s", enforceGeoBlocking: false) {
|
||||||
|
%s
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'),
|
||||||
|
})['data']['resource']['videoEssence']['attributes']['assetId']
|
||||||
|
return self.extract_info(asset_id)
|
||||||
|
|
||||||
|
|
||||||
class RedBullTVRrnContentIE(InfoExtractor):
|
class RedBullTVRrnContentIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
_VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
|
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
|
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
region, lang, rrn_id = re.search(self._VALID_URL, url).groups()
|
||||||
|
rrn_id += ':%s-%s' % (lang, region.upper())
|
||||||
|
return self.url_result(
|
||||||
|
'https://www.redbull.com/embed/' + rrn_id,
|
||||||
|
RedBullEmbedIE.ie_key(), rrn_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_url = self._og_search_url(webpage)
|
class RedBullIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04',
|
||||||
|
'md5': 'db8271a7200d40053a1809ed0dd574ff',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'AA-1MT8DQWA91W14',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Grime - Hashtags S2E4',
|
||||||
|
'description': 'md5:5546aa612958c08a98faaad4abce484d',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# only available on the int-en website so a fallback is need for the API
|
||||||
|
# https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero
|
||||||
|
'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr']
|
||||||
|
_LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups()
|
||||||
|
if filter_type == 'episodes':
|
||||||
|
filter_type = 'episode-videos'
|
||||||
|
elif filter_type == 'live':
|
||||||
|
filter_type = 'live-videos'
|
||||||
|
|
||||||
|
regions = [region.upper()]
|
||||||
|
if region != 'int':
|
||||||
|
if region in self._LAT_FALLBACK_MAP:
|
||||||
|
regions.append('LAT')
|
||||||
|
if lang in self._INT_FALLBACK_LIST:
|
||||||
|
regions.append('INT')
|
||||||
|
locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions])
|
||||||
|
|
||||||
|
rrn_id = self._download_json(
|
||||||
|
'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale,
|
||||||
|
display_id, query={
|
||||||
|
'filter[type]': filter_type,
|
||||||
|
'filter[uriSlug]': display_id,
|
||||||
|
'rb3Schema': 'v1:hero',
|
||||||
|
})['data']['id']
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
video_url, ie=RedBullTVIE.ie_key(),
|
'https://www.redbull.com/embed/' + rrn_id,
|
||||||
video_id=RedBullTVIE._match_id(video_url))
|
RedBullEmbedIE.ie_key(), rrn_id)
|
||||||
|
|
|
@ -4,6 +4,7 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
|
@ -14,7 +15,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class RedTubeIE(InfoExtractor):
|
class RedTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.redtube.com/66418',
|
'url': 'http://www.redtube.com/66418',
|
||||||
'md5': 'fc08071233725f26b8f014dba9590005',
|
'md5': 'fc08071233725f26b8f014dba9590005',
|
||||||
|
@ -30,6 +31,9 @@ class RedTubeIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
|
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://it.redtube.com/66418',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -57,7 +61,7 @@ class RedTubeIE(InfoExtractor):
|
||||||
|
|
||||||
if not info.get('title'):
|
if not info.get('title'):
|
||||||
info['title'] = self._html_search_regex(
|
info['title'] = self._html_search_regex(
|
||||||
(r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
|
(r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
|
||||||
r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
|
r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
|
||||||
webpage, 'title', group='title',
|
webpage, 'title', group='title',
|
||||||
default=None) or self._og_search_title(webpage)
|
default=None) or self._og_search_title(webpage)
|
||||||
|
@ -77,7 +81,7 @@ class RedTubeIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
medias = self._parse_json(
|
medias = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
|
r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
|
||||||
'media definitions', default='{}'),
|
'media definitions', default='{}'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
if medias and isinstance(medias, list):
|
if medias and isinstance(medias, list):
|
||||||
|
@ -85,6 +89,12 @@ class RedTubeIE(InfoExtractor):
|
||||||
format_url = url_or_none(media.get('videoUrl'))
|
format_url = url_or_none(media.get('videoUrl'))
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
|
if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
|
continue
|
||||||
format_id = media.get('quality')
|
format_id = media.get('quality')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
|
|
@ -14,12 +14,27 @@ class RtlNlIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:(?:www|static)\.)?
|
https?://(?:(?:www|static)\.)?
|
||||||
(?:
|
(?:
|
||||||
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
|
||||||
rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)
|
rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)|
|
||||||
|
embed\.rtl\.nl/\#uuid=
|
||||||
)
|
)
|
||||||
(?P<id>[0-9a-f-]+)'''
|
(?P<id>[0-9a-f-]+)'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# new URL schema
|
||||||
|
'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
|
||||||
|
'md5': '490428f1187b60d714f34e1f2e3af0b6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'RTL Nieuws',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'timestamp': 1593293400,
|
||||||
|
'upload_date': '20200627',
|
||||||
|
'duration': 661.08,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# old URL schema
|
||||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
|
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
|
||||||
'md5': '473d1946c1fdd050b2c0161a4b13c373',
|
'md5': '473d1946c1fdd050b2c0161a4b13c373',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -31,6 +46,7 @@ class RtlNlIE(InfoExtractor):
|
||||||
'upload_date': '20160429',
|
'upload_date': '20160429',
|
||||||
'duration': 1167.96,
|
'duration': 1167.96,
|
||||||
},
|
},
|
||||||
|
'skip': '404',
|
||||||
}, {
|
}, {
|
||||||
# best format available a3t
|
# best format available a3t
|
||||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||||
|
@ -76,6 +92,10 @@ class RtlNlIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
|
'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# new embed URL schema
|
||||||
|
'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
67
youtube_dl/extractor/rumble.py
Normal file
67
youtube_dl/extractor/rumble.py
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RumbleEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rumble.com/embed/v5pv5f',
|
||||||
|
'md5': '36a18a049856720189f30977ccbb2c34',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v5pv5f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
|
||||||
|
'timestamp': 1571611968,
|
||||||
|
'upload_date': '20191020',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video = self._download_json(
|
||||||
|
'https://rumble.com/embedJS/', video_id,
|
||||||
|
query={'request': 'video', 'v': video_id})
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for height, ua in (video.get('ua') or {}).items():
|
||||||
|
for i in range(2):
|
||||||
|
f_url = try_get(ua, lambda x: x[i], compat_str)
|
||||||
|
if f_url:
|
||||||
|
ext = determine_ext(f_url)
|
||||||
|
f = {
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': '%s-%sp' % (ext, height),
|
||||||
|
'height': int_or_none(height),
|
||||||
|
'url': f_url,
|
||||||
|
}
|
||||||
|
bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
|
||||||
|
if bitrate:
|
||||||
|
f['tbr'] = int_or_none(bitrate)
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
author = video.get('author') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video.get('i'),
|
||||||
|
'timestamp': parse_iso8601(video.get('pubDate')),
|
||||||
|
'channel': author.get('name'),
|
||||||
|
'channel_url': author.get('url'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
}
|
|
@ -1,9 +1,15 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
urlencode_postdata,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ServusIE(InfoExtractor):
|
class ServusIE(InfoExtractor):
|
||||||
|
@ -12,20 +18,29 @@ class ServusIE(InfoExtractor):
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
||||||
servustv\.com/videos
|
(?:servustv|pm-wissen)\.com/videos
|
||||||
)
|
)
|
||||||
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# new URL schema
|
# new URL schema
|
||||||
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
||||||
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
'md5': '60474d4c21f3eb148838f215c37f02b9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'AA-1T6VBU5PW1W12',
|
'id': 'AA-1T6VBU5PW1W12',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Die Grünen aus Sicht des Volkes',
|
'title': 'Die Grünen aus Sicht des Volkes',
|
||||||
|
'alt_title': 'Talk im Hangar-7 Voxpops Gruene',
|
||||||
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 62.442,
|
||||||
|
'timestamp': 1605193976,
|
||||||
|
'upload_date': '20201112',
|
||||||
|
'series': 'Talk im Hangar-7',
|
||||||
|
'season': 'Season 9',
|
||||||
|
'season_number': 9,
|
||||||
|
'episode': 'Episode 31 - September 14',
|
||||||
|
'episode_number': 31,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# old URL schema
|
# old URL schema
|
||||||
|
@ -40,30 +55,94 @@ class ServusIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).upper()
|
video_id = self._match_id(url).upper()
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._search_regex(
|
token = self._download_json(
|
||||||
(r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
'https://auth.redbullmediahouse.com/token', video_id,
|
||||||
r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
|
'Downloading token', data=urlencode_postdata({
|
||||||
webpage, 'title', default=None,
|
'grant_type': 'client_credentials',
|
||||||
group='title') or self._og_search_title(webpage)
|
}), headers={
|
||||||
title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
|
'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==',
|
||||||
description = self._og_search_description(webpage)
|
})
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
access_token = token['access_token']
|
||||||
|
token_type = token.get('token_type', 'Bearer')
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
video = self._download_json(
|
||||||
'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
|
'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id,
|
||||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
video_id, 'Downloading video JSON', headers={
|
||||||
|
'Authorization': '%s %s' % (token_type, access_token),
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnail = None
|
||||||
|
for resource in video['resources']:
|
||||||
|
if not isinstance(resource, dict):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(resource.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
extension = resource.get('extension')
|
||||||
|
type_ = resource.get('type')
|
||||||
|
if extension == 'jpg' or type_ == 'reference_keyframe':
|
||||||
|
thumbnail = format_url
|
||||||
|
continue
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if type_ == 'dash' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif type_ == 'hls' or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif extension == 'mp4' or ext == 'mp4':
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': type_,
|
||||||
|
'width': int_or_none(resource.get('width')),
|
||||||
|
'height': int_or_none(resource.get('height')),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
attrs = {}
|
||||||
|
for attribute in video['attributes']:
|
||||||
|
if not isinstance(attribute, dict):
|
||||||
|
continue
|
||||||
|
key = attribute.get('fieldKey')
|
||||||
|
value = attribute.get('fieldValue')
|
||||||
|
if not key or not value:
|
||||||
|
continue
|
||||||
|
attrs[key] = value
|
||||||
|
|
||||||
|
title = attrs.get('title_stv') or video_id
|
||||||
|
alt_title = attrs.get('title')
|
||||||
|
description = attrs.get('long_description') or attrs.get('short_description')
|
||||||
|
series = attrs.get('label')
|
||||||
|
season = attrs.get('season')
|
||||||
|
episode = attrs.get('chapter')
|
||||||
|
duration = float_or_none(attrs.get('duration'), scale=1000)
|
||||||
|
season_number = int_or_none(self._search_regex(
|
||||||
|
r'Season (\d+)', season or '', 'season number', default=None))
|
||||||
|
episode_number = int_or_none(self._search_regex(
|
||||||
|
r'Episode (\d+)', episode or '', 'episode number', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': unified_timestamp(video.get('lastPublished')),
|
||||||
|
'series': series,
|
||||||
|
'season': season,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
239
youtube_dl/extractor/skyit.py
Normal file
239
youtube_dl/extractor/skyit.py
Normal file
|
@ -0,0 +1,239 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItPlayerIE(InfoExtractor):
|
||||||
|
IE_NAME = 'player.sky.it'
|
||||||
|
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_DOMAIN = 'sky'
|
||||||
|
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
|
||||||
|
# http://static.sky.it/static/skyplayer/conf.json
|
||||||
|
_TOKEN_MAP = {
|
||||||
|
'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
|
||||||
|
'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
|
||||||
|
'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
|
||||||
|
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
|
||||||
|
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
|
||||||
|
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
|
||||||
|
'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
|
||||||
|
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
|
||||||
|
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _player_url_result(self, video_id):
|
||||||
|
return self.url_result(
|
||||||
|
self._PLAYER_TMPL % (video_id, self._DOMAIN),
|
||||||
|
SkyItPlayerIE.ie_key(), video_id)
|
||||||
|
|
||||||
|
def _parse_video(self, video, video_id):
|
||||||
|
title = video['title']
|
||||||
|
is_live = video.get('type') == 'live'
|
||||||
|
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
|
||||||
|
if not hls_url and video.get('geoblock' if is_live else 'geob'):
|
||||||
|
self.raise_geo_restricted(countries=['IT'])
|
||||||
|
|
||||||
|
if is_live:
|
||||||
|
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
|
||||||
|
else:
|
||||||
|
formats = self._extract_akamai_formats(
|
||||||
|
hls_url, video_id, {'http': 'videoplatform.sky.it'})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
|
||||||
|
'description': video.get('short_desc') or None,
|
||||||
|
'timestamp': unified_timestamp(video.get('create_date')),
|
||||||
|
'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
domain = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||||
|
url).query).get('domain', [None])[0]
|
||||||
|
token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
|
||||||
|
video = self._download_json(
|
||||||
|
'https://apid.sky.it/vdp/v1/getVideoData',
|
||||||
|
video_id, query={
|
||||||
|
'caller': 'sky',
|
||||||
|
'id': video_id,
|
||||||
|
'token': token
|
||||||
|
}, headers=self.geo_verification_headers())
|
||||||
|
return self._parse_video(video, video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItVideoIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'video.sky.it'
|
||||||
|
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
|
||||||
|
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||||
|
'timestamp': 1606036192,
|
||||||
|
'upload_date': '20201122',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._player_url_result(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItVideoLiveIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'video.sky.it:live'
|
||||||
|
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://video.sky.it/diretta/tg24',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||||
|
'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
asset_id = compat_str(self._parse_json(self._search_regex(
|
||||||
|
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||||
|
webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
|
||||||
|
livestream = self._download_json(
|
||||||
|
'https://apid.sky.it/vdp/v1/getLivestream',
|
||||||
|
asset_id, query={'id': asset_id})
|
||||||
|
return self._parse_video(livestream, asset_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'sky.it'
|
||||||
|
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631201',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
|
||||||
|
'upload_date': '20201121',
|
||||||
|
'timestamp': 1605995753,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
|
||||||
|
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||||
|
'timestamp': 1606036192,
|
||||||
|
'upload_date': '20201122',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
self._VIDEO_ID_REGEX, webpage, 'video id')
|
||||||
|
return self._player_url_result(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItAcademyIE(SkyItIE):
|
||||||
|
IE_NAME = 'skyacademy.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
|
||||||
|
'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '523458',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sky Academy "The Best CineCamp 2019"',
|
||||||
|
'timestamp': 1562843784,
|
||||||
|
'upload_date': '20190711',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'skyacademy'
|
||||||
|
_VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItArteIE(SkyItIE):
|
||||||
|
IE_NAME = 'arte.sky.it'
|
||||||
|
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
|
||||||
|
'md5': '515aee97b87d7a018b6c80727d3e7e17',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '627926',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
|
||||||
|
'upload_date': '20201106',
|
||||||
|
'timestamp': 1604664493,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'skyarte'
|
||||||
|
_VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
|
||||||
|
|
||||||
|
|
||||||
|
class CieloTVItIE(SkyItIE):
|
||||||
|
IE_NAME = 'cielotv.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
|
||||||
|
'md5': 'c4deed77552ba901c2a0d9258320304b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '499240',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Il lunedì è sempre un dramma',
|
||||||
|
'upload_date': '20190329',
|
||||||
|
'timestamp': 1553862178,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'cielo'
|
||||||
|
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
|
||||||
|
|
||||||
|
|
||||||
|
class TV8ItIE(SkyItVideoIE):
|
||||||
|
IE_NAME = 'tv8.it'
|
||||||
|
_VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
|
||||||
|
'md5': '9ab906a3f75ea342ed928442f9dabd21',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '630529',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
|
||||||
|
'timestamp': 1605721374,
|
||||||
|
'upload_date': '20201118',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'mtv8'
|
|
@ -2,7 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import (
|
||||||
|
bool_or_none,
|
||||||
|
smuggle_url,
|
||||||
|
try_get,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SlidesLiveIE(InfoExtractor):
|
class SlidesLiveIE(InfoExtractor):
|
||||||
|
@ -18,8 +23,21 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
|
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
|
||||||
'uploader': 'SlidesLive Videos - A',
|
'uploader': 'SlidesLive Videos - A',
|
||||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||||
|
'timestamp': 1597615266,
|
||||||
'upload_date': '20170925',
|
'upload_date': '20170925',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# video_service_name = yoda
|
||||||
|
'url': 'https://slideslive.com/38935785',
|
||||||
|
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'RMraDYN5ozA_',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# video_service_name = youtube
|
# video_service_name = youtube
|
||||||
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
|
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
|
||||||
|
@ -39,18 +57,47 @@ class SlidesLiveIE(InfoExtractor):
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://ben.slideslive.com/player/' + video_id, video_id)
|
'https://ben.slideslive.com/player/' + video_id, video_id)
|
||||||
service_name = video_data['video_service_name'].lower()
|
service_name = video_data['video_service_name'].lower()
|
||||||
assert service_name in ('url', 'vimeo', 'youtube')
|
assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
|
||||||
service_id = video_data['video_service_id']
|
service_id = video_data['video_service_id']
|
||||||
|
subtitles = {}
|
||||||
|
for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
|
||||||
|
if not isinstance(sub, dict):
|
||||||
|
continue
|
||||||
|
webvtt_url = url_or_none(sub.get('webvtt_url'))
|
||||||
|
if not webvtt_url:
|
||||||
|
continue
|
||||||
|
lang = sub.get('language') or 'en'
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': webvtt_url,
|
||||||
|
})
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'thumbnail': video_data.get('thumbnail'),
|
'thumbnail': video_data.get('thumbnail'),
|
||||||
'url': service_id,
|
'is_live': bool_or_none(video_data.get('is_live')),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
if service_name == 'url':
|
if service_name in ('url', 'yoda'):
|
||||||
info['title'] = video_data['title']
|
info['title'] = video_data['title']
|
||||||
|
if service_name == 'url':
|
||||||
|
info['url'] = service_id
|
||||||
|
else:
|
||||||
|
formats = []
|
||||||
|
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
_MANIFEST_PATTERN % (service_id, 'm3u8'), service_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
|
||||||
|
mpd_id='dash', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info.update({
|
||||||
|
'id': service_id,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
info.update({
|
info.update({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
'url': service_id,
|
||||||
'ie_key': service_name.capitalize(),
|
'ie_key': service_name.capitalize(),
|
||||||
'title': video_data.get('title'),
|
'title': video_data.get('title'),
|
||||||
})
|
})
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue