1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-12-22 16:57:40 +00:00

Dailymotion: Use og:title instead of <title> to find title (Closes: #253)

This commit is contained in:
Philipp Hagemeister 2011-12-15 20:32:05 +01:00
parent 561504fffa
commit bb8abbbbae

View file

@ -1591,6 +1591,8 @@ class DailymotionIE(InfoExtractor):
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
def _real_extract(self, url): def _real_extract(self, url):
htmlParser = HTMLParser.HTMLParser()
# Extract id and simplified title from URL # Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
@ -1601,7 +1603,6 @@ class DailymotionIE(InfoExtractor):
self._downloader.increment_downloads() self._downloader.increment_downloads()
video_id = mobj.group(1) video_id = mobj.group(1)
simple_title = mobj.group(2).decode('utf-8')
video_extension = 'flv' video_extension = 'flv'
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
@ -1631,12 +1632,13 @@ class DailymotionIE(InfoExtractor):
video_url = mediaURL video_url = mediaURL
mobj = re.search(r'(?im)<title>\s*(.+)\s*-\s*Video\s+Dailymotion</title>', webpage) mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
if mobj is None: if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title') self._downloader.trouble(u'ERROR: unable to extract title')
return return
video_title = mobj.group(1).decode('utf-8') video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8')
video_title = sanitize_title(video_title) video_title = sanitize_title(video_title)
simple_title = _simplify_title(video_title)
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage) mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
if mobj is None: if mobj is None: