mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-16 14:55:45 +00:00
[safari:api] Separate extractor (Closes #8871)
This commit is contained in:
parent
782b1b5bd1
commit
3aec71766d
2 changed files with 32 additions and 21 deletions
|
@ -628,6 +628,7 @@ from .ruutu import RuutuIE
|
||||||
from .sandia import SandiaIE
|
from .sandia import SandiaIE
|
||||||
from .safari import (
|
from .safari import (
|
||||||
SafariIE,
|
SafariIE,
|
||||||
|
SafariApiIE,
|
||||||
SafariCourseIE,
|
SafariCourseIE,
|
||||||
)
|
)
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
|
|
|
@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor):
|
||||||
class SafariIE(SafariBaseIE):
|
class SafariIE(SafariBaseIE):
|
||||||
IE_NAME = 'safari'
|
IE_NAME = 'safari'
|
||||||
IE_DESC = 'safaribooksonline.com online video'
|
IE_DESC = 'safaribooksonline.com online video'
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
|
||||||
(?:www\.)?safaribooksonline\.com/
|
|
||||||
(?:
|
|
||||||
library/view/[^/]+|
|
|
||||||
api/v1/book
|
|
||||||
)/
|
|
||||||
(?P<course_id>[^/]+)/
|
|
||||||
(?:chapter(?:-content)?/)?
|
|
||||||
(?P<part>part\d+)\.html
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
||||||
|
@ -97,9 +88,6 @@ class SafariIE(SafariBaseIE):
|
||||||
'upload_date': '20150724',
|
'upload_date': '20150724',
|
||||||
'uploader_id': 'stork',
|
'uploader_id': 'stork',
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
# non-digits in course id
|
# non-digits in course id
|
||||||
'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
|
'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
|
||||||
|
@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
course_id = mobj.group('course_id')
|
video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
|
||||||
part = mobj.group('part')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, '%s/%s' % (course_id, part))
|
webpage = self._download_webpage(url, video_id)
|
||||||
reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id')
|
reference_id = self._search_regex(
|
||||||
partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id')
|
r'data-reference-id=(["\'])(?P<id>.+?)\1',
|
||||||
ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id')
|
webpage, 'kaltura reference id', group='id')
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'data-partner-id=(["\'])(?P<id>.+?)\1',
|
||||||
|
webpage, 'kaltura widget id', group='id')
|
||||||
|
ui_id = self._search_regex(
|
||||||
|
r'data-ui-id=(["\'])(?P<id>.+?)\1',
|
||||||
|
webpage, 'kaltura uiconf id', group='id')
|
||||||
|
|
||||||
query = {
|
query = {
|
||||||
'wid': '_%s' % partner_id,
|
'wid': '_%s' % partner_id,
|
||||||
|
@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE):
|
||||||
if self.LOGGED_IN:
|
if self.LOGGED_IN:
|
||||||
kaltura_session = self._download_json(
|
kaltura_session = self._download_json(
|
||||||
'%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
|
'%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
|
||||||
course_id, 'Downloading kaltura session JSON',
|
video_id, 'Downloading kaltura session JSON',
|
||||||
'Unable to download kaltura session JSON', fatal=False)
|
'Unable to download kaltura session JSON', fatal=False)
|
||||||
if kaltura_session:
|
if kaltura_session:
|
||||||
session = kaltura_session.get('session')
|
session = kaltura_session.get('session')
|
||||||
|
@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE):
|
||||||
'Kaltura')
|
'Kaltura')
|
||||||
|
|
||||||
|
|
||||||
|
class SafariApiIE(SafariBaseIE):
|
||||||
|
IE_NAME = 'safari:api'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
part = self._download_json(
|
||||||
|
url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
|
||||||
|
'Downloading part JSON')
|
||||||
|
return self.url_result(part['web_url'], SafariIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
class SafariCourseIE(SafariBaseIE):
|
class SafariCourseIE(SafariBaseIE):
|
||||||
IE_NAME = 'safari:course'
|
IE_NAME = 'safari:course'
|
||||||
IE_DESC = 'safaribooksonline.com online courses'
|
IE_DESC = 'safaribooksonline.com online courses'
|
||||||
|
@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE):
|
||||||
'No chapters found for course %s' % course_id, expected=True)
|
'No chapters found for course %s' % course_id, expected=True)
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(chapter, 'Safari')
|
self.url_result(chapter, SafariApiIE.ie_key())
|
||||||
for chapter in course_json['chapters']]
|
for chapter in course_json['chapters']]
|
||||||
|
|
||||||
course_title = course_json['title']
|
course_title = course_json['title']
|
||||||
|
|
Loading…
Reference in a new issue