From bf78e7d8a8de218f327b08657ff007170ee87743 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Sun, 25 Mar 2018 07:20:17 +0530 Subject: [PATCH 01/10] Added extractor for torrins.com --- youtube_dl/extractor/torrins.py | 160 ++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 youtube_dl/extractor/torrins.py diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py new file mode 100644 index 000000000..a8b247c89 --- /dev/null +++ b/youtube_dl/extractor/torrins.py @@ -0,0 +1,160 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_kwargs, + compat_str, + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( + determine_ext, + extract_attributes, + ExtractorError, + float_or_none, + int_or_none, + js_to_json, + sanitized_Request, + unescapeHTML, + urlencode_postdata, +) + + +class TorrinsIE(InfoExtractor): + IE_NAME = 'torrins' + _VALID_URL = r'''(?x) + https?:// + www\.torrins\.com/(?:guitar|piano|bass)-lessons/(?:song-lessons|style-genre)/[^/]+/(?P[^/]+)/(?P[^(\.)]+)\.html + ''' + _LOGIN_URL = 'https://www.torrins.com/services/user/sign-in' + _ORIGIN_URL = 'https://www.torrins.com' + _NETRC_MACHINE = 'torrins' + + _TESTS = [{ + 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', + 'username': 'anirudha1987@gmail.com', + 'password': 'hello123', + 'info_dict': { + 'id': 'another-brick-in-the-wall', + 'lesson_id': 'song-demo', + 'ext': 'mp4', + 'title': 'Another Brick in the Wall Guitar - Song Demo', + 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', + 'duration': 579.29, + }, + 'skip': 'Requires torrins premium account credentials', + }, { + # new URL schema + 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', + 'only_matching': True, + }, { + # no url in outputs format entry + 'url': 'https://www.torrins.com/guitar-lessons/style-genre/fingerstyle/tone-production.html', + 'only_matching': True, + }] + + def _handle_error(self, response): + if not isinstance(response, dict): + return + error = response.get('error') + if error: + error_str = 'Torrins returned error #%s: %s' % (error.get('code'), error.get('message')) + error_data = error.get('data') + if error_data: + error_str += ' - %s' % error_data.get('formErrors') + raise ExtractorError(error_str, expected=True) + + def _download_webpage(self, *args, **kwargs): + kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + return super(TorrinsIE, self)._download_webpage( + *args, **compat_kwargs(kwargs)) + + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_popup = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(reason): + webpage = self._download_webpage(self._ORIGIN_URL, None, reason) + + return any(re.search(p, webpage) for p in ( + r'id=["\'](?:bt-logout)', + r'>Logout<')) + + # already logged in + if is_logged('Checking if already logged in'): + return + + #login_form = self._form_hidden_inputs('login-form', login_popup) + + login_form = { + 'email': username, + 'password': password, + } + + response = self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={ + 'Referer': self._ORIGIN_URL, + 'Origin': self._ORIGIN_URL, + }) + + if not is_logged('Post login check'): + error = self._html_search_regex( + r'(?s)]+class="form-errors[^"]*">(.+?)', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + title = self._og_search_title(webpage) + + video_json = self._html_search_regex(r"
Date: Sun, 25 Mar 2018 07:45:11 +0530 Subject: [PATCH 02/10] Added torrins to extractor list --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de48a37ad..b0f2ef7e7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1094,6 +1094,7 @@ from .tnaflix import ( from .toggle import ToggleIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE +from .torrins import TorrinsIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE From bc7d79b933bce052b70032c2d448ed03c6d088e4 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Sun, 25 Mar 2018 07:50:21 +0530 Subject: [PATCH 03/10] [torrins] removed unnecessary stuff --- youtube_dl/extractor/torrins.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index a8b247c89..70a0b31fa 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -35,8 +35,6 @@ class TorrinsIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', - 'username': 'anirudha1987@gmail.com', - 'password': 'hello123', 'info_dict': { 'id': 'another-brick-in-the-wall', 'lesson_id': 'song-demo', From 5da4714716cdc138f91c88169de47db4a403be7f Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Sun, 25 Mar 2018 08:17:54 +0530 Subject: [PATCH 04/10] [torrins] cleanup --- youtube_dl/extractor/torrins.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index 70a0b31fa..a9a79a0ec 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -4,21 +4,10 @@ import re from .common import InfoExtractor from ..compat import ( - compat_HTTPError, compat_kwargs, - compat_str, - compat_urllib_request, - compat_urlparse, ) from ..utils import ( - determine_ext, - extract_attributes, ExtractorError, - float_or_none, - int_or_none, - js_to_json, - sanitized_Request, - unescapeHTML, urlencode_postdata, ) @@ -78,7 +67,7 @@ class TorrinsIE(InfoExtractor): if username is None: return - login_popup = self._download_webpage( + self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') def is_logged(reason): @@ -92,8 +81,6 @@ class TorrinsIE(InfoExtractor): if is_logged('Checking if already logged in'): return - #login_form = self._form_hidden_inputs('login-form', login_popup) - login_form = { 'email': username, 'password': password, @@ -122,7 +109,7 @@ class TorrinsIE(InfoExtractor): title = self._og_search_title(webpage) - video_json = self._html_search_regex(r"
Date: Sun, 25 Mar 2018 08:41:44 +0530 Subject: [PATCH 05/10] [torrins] updated test --- youtube_dl/extractor/torrins.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index a9a79a0ec..d35ba714a 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -22,7 +22,7 @@ class TorrinsIE(InfoExtractor): _ORIGIN_URL = 'https://www.torrins.com' _NETRC_MACHINE = 'torrins' - _TESTS = [{ + _TEST = { 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', 'info_dict': { 'id': 'another-brick-in-the-wall', @@ -33,15 +33,7 @@ class TorrinsIE(InfoExtractor): 'duration': 579.29, }, 'skip': 'Requires torrins premium account credentials', - }, { - # new URL schema - 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', - 'only_matching': True, - }, { - # no url in outputs format entry - 'url': 'https://www.torrins.com/guitar-lessons/style-genre/fingerstyle/tone-production.html', - 'only_matching': True, - }] + } def _handle_error(self, response): if not isinstance(response, dict): From 8d5b553ed1cf447ea54d7fee6f3b84d87ea19b46 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Sun, 25 Mar 2018 09:46:38 +0530 Subject: [PATCH 06/10] [torrins] fixed codes for required metafields --- youtube_dl/extractor/torrins.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index d35ba714a..c573c2455 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -105,24 +105,24 @@ class TorrinsIE(InfoExtractor): video_json = self._parse_json(video_json, course_id) - title = video_json.get('title') - video_id = video_json.get('id') + title = video_json['title'] + video_id = video_json['id'] formats = [ { - 'url': video_json.get('low'), + 'url': video_json['low'], 'format_id': 'low', 'height': 240, 'ext': 'mp4' }, { - 'url': video_json.get('medium'), + 'url': video_json['medium'], 'format_id': 'medium', 'height': 360, 'ext': 'mp4' }, { - 'url': video_json.get('high'), + 'url': video_json['high'], 'format_id': 'high', 'height': 480, 'ext': 'mp4' From 995cb53e408bde8f13a798cfadbd7f433eada6b7 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Mon, 26 Mar 2018 21:35:23 +0530 Subject: [PATCH 07/10] [torrins] fixed regex et al. --- youtube_dl/extractor/torrins.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index c573c2455..7cc84ed26 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -16,7 +16,7 @@ class TorrinsIE(InfoExtractor): IE_NAME = 'torrins' _VALID_URL = r'''(?x) https?:// - www\.torrins\.com/(?:guitar|piano|bass)-lessons/(?:song-lessons|style-genre)/[^/]+/(?P[^/]+)/(?P[^(\.)]+)\.html + www\.torrins\.com/(?:guitar|piano|bass)-lessons/(?:song-lessons|style-genre)/[^/]+/(?P[^/]+)/[^\.]+\.html ''' _LOGIN_URL = 'https://www.torrins.com/services/user/sign-in' _ORIGIN_URL = 'https://www.torrins.com' @@ -26,7 +26,6 @@ class TorrinsIE(InfoExtractor): 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', 'info_dict': { 'id': 'another-brick-in-the-wall', - 'lesson_id': 'song-demo', 'ext': 'mp4', 'title': 'Another Brick in the Wall Guitar - Song Demo', 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', @@ -38,12 +37,12 @@ class TorrinsIE(InfoExtractor): def _handle_error(self, response): if not isinstance(response, dict): return - error = response.get('error') + error = response['error'] if error: - error_str = 'Torrins returned error #%s: %s' % (error.get('code'), error.get('message')) - error_data = error.get('data') + error_str = 'Torrins returned error #%s: %s' % (error['code'], error.get['message']) + error_data = error['data'] if error_data: - error_str += ' - %s' % error_data.get('formErrors') + error_str += ' - %s' % error_data['formErrors'] raise ExtractorError(error_str, expected=True) def _download_webpage(self, *args, **kwargs): @@ -59,9 +58,6 @@ class TorrinsIE(InfoExtractor): if username is None: return - self._download_webpage( - self._LOGIN_URL, None, 'Downloading login popup') - def is_logged(reason): webpage = self._download_webpage(self._ORIGIN_URL, None, reason) @@ -99,13 +95,10 @@ class TorrinsIE(InfoExtractor): webpage = self._download_webpage(url, course_id) - title = self._og_search_title(webpage) - video_json = self._html_search_regex(r"
Date: Mon, 26 Mar 2018 22:28:05 +0530 Subject: [PATCH 08/10] [torrins] fixed formats handling --- youtube_dl/extractor/torrins.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index 7cc84ed26..8185afb1a 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -101,26 +101,31 @@ class TorrinsIE(InfoExtractor): title = video_json.get('title') or self._og_search_title(webpage) video_id = video_json['id'] - formats = [ - { + formats = [] + + if 'low' in video_json: + formats.append({ 'url': video_json['low'], 'format_id': 'low', 'height': 240, 'ext': 'mp4' - }, - { + }) + + if 'medium' in video_json: + formats.append({ 'url': video_json['medium'], 'format_id': 'medium', 'height': 360, 'ext': 'mp4' - }, - { + }) + + if 'high' in video_json: + formats.append({ 'url': video_json['high'], 'format_id': 'high', 'height': 480, 'ext': 'mp4' - } - ] + }) return { 'id': video_id, From 83ab02db3ef479ffc2b095f85ce82461b0b042f6 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Mon, 26 Mar 2018 23:17:27 +0530 Subject: [PATCH 09/10] [torrins] removed code duplication --- youtube_dl/extractor/torrins.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index 8185afb1a..dfe7324ad 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -103,29 +103,14 @@ class TorrinsIE(InfoExtractor): formats = [] - if 'low' in video_json: - formats.append({ - 'url': video_json['low'], - 'format_id': 'low', - 'height': 240, - 'ext': 'mp4' - }) - - if 'medium' in video_json: - formats.append({ - 'url': video_json['medium'], - 'format_id': 'medium', - 'height': 360, - 'ext': 'mp4' - }) - - if 'high' in video_json: - formats.append({ - 'url': video_json['high'], - 'format_id': 'high', - 'height': 480, - 'ext': 'mp4' - }) + for format, height in {'low': 240, 'medium': 360, 'high': 480}.items(): + if format in video_json: + formats.append({ + 'url': video_json['low'], + 'format_id': format, + 'height': height, + 'ext': 'mp4' + }) return { 'id': video_id, From de60d4966c59bcefddc6d0651d8003a78fde8771 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Wed, 28 Mar 2018 02:35:47 +0530 Subject: [PATCH 10/10] [torrins] Removed function override --- youtube_dl/extractor/torrins.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py index dfe7324ad..77a8584b2 100644 --- a/youtube_dl/extractor/torrins.py +++ b/youtube_dl/extractor/torrins.py @@ -3,9 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_kwargs, -) + from ..utils import ( ExtractorError, urlencode_postdata, @@ -45,11 +43,6 @@ class TorrinsIE(InfoExtractor): error_str += ' - %s' % error_data['formErrors'] raise ExtractorError(error_str, expected=True) - def _download_webpage(self, *args, **kwargs): - kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' - return super(TorrinsIE, self)._download_webpage( - *args, **compat_kwargs(kwargs)) - def _real_initialize(self): self._login()