mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-25 11:41:52 +00:00
[youtube] Urls like youtube.com/NASA are now interpreted as users (fixes #1069)
Video urls like http://youtube.com/BaW_jenozKc are not valid, but http://youtu.be/BaW_jenozKc is correct.
This commit is contained in:
parent
a636203ea5
commit
f4b052321b
2 changed files with 29 additions and 12 deletions
|
@ -11,6 +11,15 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
|
||||||
from helper import get_testcases
|
from helper import get_testcases
|
||||||
|
|
||||||
class TestAllURLsMatching(unittest.TestCase):
|
class TestAllURLsMatching(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.ies = gen_extractors()
|
||||||
|
|
||||||
|
def matching_ies(self, url):
|
||||||
|
return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
|
||||||
|
|
||||||
|
def assertMatch(self, url, ie_list):
|
||||||
|
self.assertEqual(self.matching_ies(url), ie_list)
|
||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
|
self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
|
||||||
|
@ -24,12 +33,17 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
||||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||||
|
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||||
|
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||||
|
|
||||||
def test_youtube_channel_matching(self):
|
def test_youtube_channel_matching(self):
|
||||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
|
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
|
||||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
|
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
|
||||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
|
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
|
||||||
|
|
||||||
|
def test_youtube_user_matching(self):
|
||||||
|
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_justin_tv_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||||
|
@ -63,15 +77,12 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||||
|
|
||||||
def test_keywords(self):
|
def test_keywords(self):
|
||||||
ies = gen_extractors()
|
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||||
matching_ies = lambda url: [ie.IE_NAME for ie in ies
|
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||||
if ie.suitable(url) and ie.IE_NAME != 'generic']
|
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
||||||
self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
|
self.assertMatch(':tds', ['ComedyCentral'])
|
||||||
self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
|
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
||||||
self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
|
self.assertMatch(':cr', ['ComedyCentral'])
|
||||||
self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
|
|
||||||
self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
|
|
||||||
self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -135,7 +135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
_VALID_URL = r"""^
|
_VALID_URL = r"""^
|
||||||
(
|
(
|
||||||
(?:https?://)? # http(s):// (optional)
|
(?:https?://)? # http(s):// (optional)
|
||||||
(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
|
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
|
||||||
tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
|
tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
|
@ -146,7 +146,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||||
v=
|
v=
|
||||||
)
|
)
|
||||||
)? # optional -> youtube.com/xxxx is OK
|
))
|
||||||
|
|youtu\.be/ # just youtu.be/xxxx
|
||||||
|
)
|
||||||
)? # all until now is optional -> you can pass the naked ID
|
)? # all until now is optional -> you can pass the naked ID
|
||||||
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
|
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
|
||||||
(?(1).+)? # if we found the ID, everything can follow
|
(?(1).+)? # if we found the ID, everything can follow
|
||||||
|
@ -1013,13 +1015,17 @@ class YoutubeChannelIE(InfoExtractor):
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
|
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
|
||||||
_GDATA_PAGE_SIZE = 50
|
_GDATA_PAGE_SIZE = 50
|
||||||
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
|
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
|
||||||
_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
|
_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
|
||||||
IE_NAME = u'youtube:user'
|
IE_NAME = u'youtube:user'
|
||||||
|
|
||||||
|
def suitable(cls, url):
|
||||||
|
if YoutubeIE.suitable(url): return False
|
||||||
|
else: return super(YoutubeUserIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract username
|
# Extract username
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
Loading…
Reference in a new issue