1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-22 02:01:50 +00:00

added speedrun author and timer

optional (fatal=False) because the website's layout forced me into
an unreliably regex.
This commit is contained in:
SpiderRider067 2020-12-31 23:02:26 -05:00 committed by GitHub
parent 14834bf977
commit 9addbadd33
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -27,8 +27,13 @@ class TASVideosIE(InfoExtractor):
r'<a [^>]+(?P<URL>archive\.org\/download[^<]+\.(?:mkv|mp4|avi))[^<]+<\/a>', r'<a [^>]+(?P<URL>archive\.org\/download[^<]+\.(?:mkv|mp4|avi))[^<]+<\/a>',
webpage) webpage)
title = self._search_regex( title = self._search_regex(
r'<span title="Movie[^"]+">(?P<TITLE>[^<]+)<\/span>', webpage, r'<span title="Movie[^"]+">(?P<TITLE>[^<]+)<\/span>',
'title') webpage, 'title')
time_and_author = self._html_search_regex(
r'<th.*<\/span>(?P<time_and_author>.*)<\/th>', webpage,
'title: speedrun timer and credit', fatal=False)
if time_and_author is not None:
title = title + time_and_author
formats = [] formats = []
for url in video_urls: for url in video_urls:
@ -60,15 +65,20 @@ class TASVideosPlaylistIE(InfoExtractor):
playlist_title = self._search_regex( playlist_title = self._search_regex(
r'<title>(?P<title>[^<]*)</title>', webpage, 'title') r'<title>(?P<title>[^<]*)</title>', webpage, 'title')
video_entries = get_elements_by_class('item', webpage) video_entries = get_elements_by_class('item', webpage)
entries = [] entries = []
for entry in video_entries: for entry in video_entries:
video_urls = re.findall( video_urls = re.findall(
r'<a [^>]+(?P<URL>archive\.org\/download[^<]+\.(?:mkv|mp4|avi))[^<]+<\/a>', r'<a [^>]+(?P<URL>archive\.org\/download[^<]+\.(?:mkv|mp4|avi))[^<]+<\/a>',
entry) entry)
title = self._search_regex( title = self._search_regex(
r'<span title="Movie[^"]+">(?P<title>[^<]+)<\/span>', entry, r'<span title="Movie[^"]+">(?P<title>[^<]+)<\/span>',
'title') entry, 'title')
time_and_author = self._html_search_regex(
r'<th.*<\/span>(?P<time_and_author>.*)<\/th>', entry,
'time_and_author', fatal=False)
if time_and_author is not None:
title = title + time_and_author
video_id = self._search_regex( video_id = self._search_regex(
r'id="movie_(?P<id>\d+)', entry, 'video id') + 'M' r'id="movie_(?P<id>\d+)', entry, 'video id') + 'M'