diff --git a/devscripts/wine-py2exe.sh b/devscripts/wine-py2exe.sh index 319ffcbc8..dc2d6501a 100644 --- a/devscripts/wine-py2exe.sh +++ b/devscripts/wine-py2exe.sh @@ -18,7 +18,6 @@ if [ ! -d wine-py2exe ]; then axel -a "http://www.python.org/ftp/python/2.7/python-2.7.msi" axel -a "http://downloads.sourceforge.net/project/py2exe/py2exe/0.6.9/py2exe-0.6.9.win32-py2.7.exe" - axel -a "http://pypi.python.org/packages/2.7/l/lxml/lxml-2.3.win32-py2.7.exe" #axel -a "http://winetricks.org/winetricks" # http://appdb.winehq.org/objectManager.php?sClass=version&iId=21957 @@ -28,13 +27,9 @@ if [ ! -d wine-py2exe ]; then echo "Follow py2exe setup on screen" wine py2exe-0.6.9.win32-py2.7.exe - echo "Follow lxml setup on screen" - wine lxml-2.3.win32-py2.7.exe - #echo "Follow Microsoft Visual C++ 2008 Redistributable Package setup on screen" #bash winetricks vcrun2008 - rm lxml-2.3.win32-py2.7.exe rm py2exe-0.6.9.win32-py2.7.exe rm python-2.7.msi #rm winetricks diff --git a/youtube-dl b/youtube-dl index 7e4640c66..c4b5c07ca 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.exe b/youtube-dl.exe index ec793ecee..cb9654283 100755 Binary files a/youtube-dl.exe and b/youtube-dl.exe differ diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 4314f1402..d77154dcb 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -24,11 +24,6 @@ try: except ImportError: from cgi import parse_qs -try: - import lxml.etree -except ImportError: - pass # Handled below - try: import xml.etree.ElementTree except ImportError: # Python<2.5: Not officially supported, but let it slip @@ -193,8 +188,8 @@ class YoutubeIE(InfoExtractor): end = start + float(dur) start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000) end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000) - caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) - caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) # double cycle, inentional + caption = unescapeHTML(caption) + caption = unescapeHTML(caption) # double cycle, inentional srt += str(n) + '\n' srt += start + ' --> ' + end + '\n' srt += caption + '\n\n' @@ -364,18 +359,9 @@ class YoutubeIE(InfoExtractor): pass # description - try: - lxml.etree - except NameError: - video_description = u'No description available.' - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1).decode('utf-8') - else: - html_parser = lxml.etree.HTMLParser(encoding='utf-8') - vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) - video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) - # TODO use another parser + video_description = get_element_by_id("eow-description", video_webpage) + if video_description: video_description = clean_html(video_description.decode('utf8')) + else: video_description = '' # closed captions video_subtitles = None @@ -992,7 +978,7 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'ERROR: Unable to extract media URL') return video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8') - video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url) + video_url = unescapeHTML(video_url) return [{ 'id': video_id.decode('utf-8'), @@ -1069,18 +1055,9 @@ class VimeoIE(InfoExtractor): video_thumbnail = config["video"]["thumbnail"] # Extract video description - try: - lxml.etree - except NameError: - video_description = u'No description available.' - mobj = re.search(r'', webpage, re.MULTILINE) - if mobj is not None: - video_description = mobj.group(1) - else: - html_parser = lxml.etree.HTMLParser() - vwebpage_doc = lxml.etree.parse(StringIO.StringIO(webpage), html_parser) - video_description = u''.join(vwebpage_doc.xpath('id("description")//text()')).strip() - # TODO use another parser + video_description = get_element_by_id("description", webpage) + if video_description: video_description = clean_html(video_description.decode('utf8')) + else: video_description = '' # Extract upload date video_upload_date = u'NA' @@ -2248,8 +2225,6 @@ class EscapistIE(InfoExtractor): self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) def _real_extract(self, url): - htmlParser = HTMLParser.HTMLParser() - mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) @@ -2265,11 +2240,11 @@ class EscapistIE(InfoExtractor): return descMatch = re.search(' + html = html.replace('\n', ' ') + html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html) + # Strip html tags + html = re.sub('<.*?>', '', html) + # Replace html entities + html = unescapeHTML(html) + return html + + def sanitize_title(utitle): """Sanitizes a video title so it could be used as part of a filename.""" - utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) + utitle = unescapeHTML(utitle) return utitle.replace(unicode(os.sep), u'%') @@ -133,8 +210,8 @@ def unescapeHTML(s): """ assert type(s) == type(u'') - htmlParser = HTMLParser.HTMLParser() - return htmlParser.unescape(s) + result = re.sub(ur'(?u)&(.+?);', htmlentity_transform, s) + return result def encodeFilename(s): """