From 39baacc49f323adc639d502d38a016ebd63acd75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Sep 2013 12:45:53 +0200 Subject: [PATCH] [dailymotion] Add an extractor for users (closes #1476) --- test/test_playlists.py | 16 ++++++++++- youtube_dl/extractor/__init__.py | 6 +++- youtube_dl/extractor/dailymotion.py | 44 +++++++++++++++++++++++------ 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index d079a4f23..e22054d69 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# encoding: utf-8 import sys import unittest @@ -8,7 +9,13 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE +from youtube_dl.extractor import ( + DailymotionPlaylistIE, + DailymotionUserIE, + VimeoChannelIE, + UstreamChannelIE, + SoundcloudUserIE, +) from youtube_dl.utils import * from helper import FakeYDL @@ -25,6 +32,13 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['title'], u'SPORT') self.assertTrue(len(result['entries']) > 20) + def test_dailymotion_user(self): + dl = FakeYDL() + ie = DailymotionUserIE(dl) + result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'Génération Quoi') + self.assertTrue(len(result['entries']) >= 26) def test_vimeo_channel(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c6a55f194..949f59a44 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -18,7 +18,11 @@ from .comedycentral import ComedyCentralIE from .condenast import CondeNastIE from .criterion import CriterionIE from .cspan import CSpanIE -from .dailymotion import DailymotionIE, DailymotionPlaylistIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionUserIE, +) from .daum import DaumIE from .depositfiles import DepositFilesIE from .dotsub import DotsubIE diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index ce7057a26..64b89aae8 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -114,28 +114,54 @@ class DailymotionIE(SubtitlesInfoExtractor): class DailymotionPlaylistIE(InfoExtractor): + IE_NAME = u'dailymotion:playlist' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' _MORE_PAGES_INDICATOR = r'' + _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + def _extract_entries(self, id): video_ids = [] - for pagenum in itertools.count(1): - webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum), - playlist_id, u'Downloading page %s' % pagenum) + webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum), + id, u'Downloading page %s' % pagenum) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break - - entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') for video_id in video_ids] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + webpage = self._download_webpage(url, playlist_id) + return {'_type': 'playlist', 'id': playlist_id, 'title': get_element_by_id(u'playlist_name', webpage), - 'entries': entries, + 'entries': self._extract_entries(playlist_id), } + + +class DailymotionUserIE(DailymotionPlaylistIE): + IE_NAME = u'dailymotion:user' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P[^/]+)' + _MORE_PAGES_INDICATOR = r'' + _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + webpage = self._download_webpage(url, user) + full_user = self._html_search_regex( + r'(.*?)