From 45088c6301a73e250118621812b331e3d6d8a948 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 16 Oct 2023 04:11:53 +0100 Subject: [PATCH] [XVideos] Add XVideosCategoryIE for category and tag pages --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/xvideos.py | 89 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 292c39c42..c3ef4933e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1621,6 +1621,7 @@ from .xtube import XTubeUserIE, XTubeIE from .xuite import XuiteIE from .xvideos import ( XVideosIE, + XVideosCategoryIE, XVideosChannelIE, XVideosPlaylistIE, XVideosRelatedIE, diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index b44df74a9..427d4c46b 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -471,6 +471,95 @@ class XVideosPlaylistIE(XVideosPlaylistBaseIE): return title +class XVideosCategoryIE(XVideosPlaylistBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:[^/]+\.)?xvideos\d*\.com/ + (?P(?Pc)|tags) + (?P(?:/[dmqs]:[\w-]+)*)/(?P\w+(?(c)-\d+)) + (?:/(?P\d+))? + ''' + _TESTS = [{ + 'note': 'videos in category for this month', + 'url': 'https://www.xvideos.com/c/m:month/ASMR-229', + 'info_dict': { + 'id': 'c/ASMR-229/m:month', + 'title': 'Category:ASMR (m=month)', + }, + 'playlist_mincount': 100, + }, { + 'note': 'page 3 of videos in category for this month', + 'url': 'https://www.xvideos.com/c/m:month/ASMR-229/2', + 'info_dict': { + 'id': 'c/ASMR-229/m:month/2', + 'title': 'Category:ASMR (m=month,p3)', + }, + 'playlist_count': 27, + }, { + 'note': 'videos tagged yiff', + 'url': 'https://www.xvideos.com/tags/yiff', + 'info_dict': { + 'id': 'tags/yiff', + 'title': 'Tag:yiff', + }, + 'playlist_mincount': 80, + }, { + 'note': 'page 3 of videos tagged yiff', + 'url': 'https://www.xvideos.com/tags/yiff/2', + 'info_dict': { + 'id': 'tags/yiff/2', + 'title': 'Tag:yiff (p3)', + }, + 'playlist_count': 27, + }, { + 'note': 'long videos tagged yiff', + 'url': 'https://www.xvideos.com/tags/d:10-20min/yiff', + 'info_dict': { + 'id': 'tags/yiff/d:10-20min', + 'title': 'Tag:yiff (d=10-20min)', + }, + 'playlist_mincount': 20, + 'playlist_maxcount': 40, + }, { + 'note': 'videos tagged yiff, longest first', + 'url': 'https://www.xvideos.com/tags/s:length/yiff', + 'info_dict': { + 'id': 'tags/yiff/s:length', + 'title': 'Tag:yiff (s=length)', + }, + 'playlist': [{ + 'info_dict': { + 'id': r're:\d+', + 'ext': 'mp4', + 'title': r're:\w+', + 'uploader': r're:\w+', + 'age_limit': int, + 'duration': 'lambda c: c >= 1321' # for video 38266161 + }, + }], + }] + + def _get_playlist_id(self, playlist_id, **kwargs): + url = kwargs['url'] + c_type, sub = self._match_valid_url(url).group('type', 'sub') + sub = sub.split('/') + sub.append(kwargs.get('pnum')) + return join_nonempty(c_type, playlist_id, *sub, delim='/') + + def _get_title(self, page, playlist_id, **kwargs): + pl_id = playlist_id.split('/') + title = '%s:%s' % (( + 'Category', pl_id[1].rsplit('-', 1)[0]) if pl_id[0] == 'c' + else ('Tag', pl_id[1])) + pnum = int_or_none(pl_id[-1]) + if pnum: + pl_id[-1] = 'p%d' % (pnum + 1) + subs = ','.join(x.replace(':', '=', 1) for x in pl_id[2:]) + if subs: + title = '%s (%s)' % (title, subs) + return title + + class XVideosChannelIE(XVideosPlaylistBaseIE): _VALID_URL = r'''(?x) https?://