From 9addbadd330b2dd86792aaedd9633ca2f5657a69 Mon Sep 17 00:00:00 2001 From: SpiderRider067 <36859584+SpiderRider067@users.noreply.github.com> Date: Thu, 31 Dec 2020 23:02:26 -0500 Subject: [PATCH] added speedrun author and timer optional (fatal=False) because the website's layout forced me into an unreliably regex. --- youtube_dl/extractor/tasvideos.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tasvideos.py b/youtube_dl/extractor/tasvideos.py index 4848ae5dd..f628d7c05 100644 --- a/youtube_dl/extractor/tasvideos.py +++ b/youtube_dl/extractor/tasvideos.py @@ -27,8 +27,13 @@ class TASVideosIE(InfoExtractor): r']+(?Parchive\.org\/download[^<]+\.(?:mkv|mp4|avi))[^<]+<\/a>', webpage) title = self._search_regex( - r'(?P[^<]+)<\/span>', webpage, - 'title') + r'<span title="Movie[^"]+">(?P<TITLE>[^<]+)<\/span>', + webpage, 'title') + time_and_author = self._html_search_regex( + r'<th.*<\/span>(?P<time_and_author>.*)<\/th>', webpage, + 'title: speedrun timer and credit', fatal=False) + if time_and_author is not None: + title = title + time_and_author formats = [] for url in video_urls: @@ -60,15 +65,20 @@ class TASVideosPlaylistIE(InfoExtractor): playlist_title = self._search_regex( r'<title>(?P<title>[^<]*)', webpage, 'title') video_entries = get_elements_by_class('item', webpage) - + entries = [] for entry in video_entries: video_urls = re.findall( r']+(?Parchive\.org\/download[^<]+\.(?:mkv|mp4|avi))[^<]+<\/a>', entry) title = self._search_regex( - r'(?P[^<]+)<\/span>', entry, - 'title') + r'<span title="Movie[^"]+">(?P<title>[^<]+)<\/span>', + entry, 'title') + time_and_author = self._html_search_regex( + r'<th.*<\/span>(?P<time_and_author>.*)<\/th>', entry, + 'time_and_author', fatal=False) + if time_and_author is not None: + title = title + time_and_author video_id = self._search_regex( r'id="movie_(?P<id>\d+)', entry, 'video id') + 'M'