mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-16 23:35:45 +00:00
[LibriVox] Add new extractor
This commit is contained in:
parent
3b65a6fbf3
commit
aa016336a8
2 changed files with 49 additions and 0 deletions
|
@ -527,6 +527,7 @@ from .leeco import (
|
||||||
LetvCloudIE,
|
LetvCloudIE,
|
||||||
)
|
)
|
||||||
from .libraryofcongress import LibraryOfCongressIE
|
from .libraryofcongress import LibraryOfCongressIE
|
||||||
|
from .librivox import LibriVoxIE
|
||||||
from .libsyn import LibsynIE
|
from .libsyn import LibsynIE
|
||||||
from .lifenews import (
|
from .lifenews import (
|
||||||
LifeNewsIE,
|
LifeNewsIE,
|
||||||
|
|
48
youtube_dl/extractor/librivox.py
Normal file
48
youtube_dl/extractor/librivox.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
orderedSet
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LibriVoxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?librivox\.org/(?P<id>(?P<title>(?:[^\-]*\-)+[^\-]*)\-by\-(?P<author>(-.*\-)*[^/]*))/?'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://librivox.org/the-art-of-war-by-sun-tzu/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-art-of-war-by-sun-tzu',
|
||||||
|
'title': 'The Art Of War by Sun Tzu'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7
|
||||||
|
}, {
|
||||||
|
'url': 'https://librivox.org/alexander-the-great-by-jacob-abbott/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'alexander-the-great-by-jacob-abbott',
|
||||||
|
'title': 'Alexander The Great by Jacob Abbott'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
book_title = mobj.group('title').replace('-', ' ').strip().title()
|
||||||
|
author = mobj.group('author').replace('-', ' ').strip().title()
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': book_title + ' by ' + author
|
||||||
|
}
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
links = orderedSet(re.findall(r'<a href="(https?://(?:www\.)?archive\.org/download/[^/]*/([^\.]*(?<!(?:64kb)))\.mp3)".*>(.*)</a>', webpage))
|
||||||
|
info['entries'] = [self.url_result(link[0], video_id=link[1], video_title=link[2]) for link in links]
|
||||||
|
|
||||||
|
return info
|
Loading…
Reference in a new issue