1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-20 09:12:14 +00:00
youtube-dl/youtube_dl/extractor/vxxx.py

111 lines
4 KiB
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import unified_timestamp, parse_duration
class VXXXIE(InfoExtractor):
_VALID_URL = r'https?://vxxx\.com/video-(?P<id>\d+)'
_TESTS = [{
'url': 'https://vxxx.com/video-80747/',
'md5': '2f4bfd829b682ff9e3da1bda71b81b81',
'info_dict': {
'id': '80747',
'ext': 'mp4',
'title': 'Monica Aka Selina',
'display_id': 'monica-aka-selina',
'thumbnail': 'https://tn.vxxx.com/contents/videos_screenshots/80000/80747/420x236/1.jpg',
'description': '',
'timestamp': 1607167706,
'upload_date': '20201205',
'duration': 2373.0,
'categories': ['Anal', 'Asian', 'BDSM', 'Brunette', 'Toys',
'Fetish', 'HD', 'Interracial', 'MILF'],
2022-10-02 10:50:56 +00:00
'age_limit': 18,
}
}]
def _download_info_object(self, video_id):
return self._download_json(
'https://vxxx.com/api/json/video/86400/0/{}/{}.json'.format(
int(video_id) // 1000 * 1000,
video_id,
), video_id, headers={'Referer': 'https://vxxx.com'})['video']
def _download_format_object(self, video_id):
return self._download_json(
'https://vxxx.com/api/videofile.php?video_id={}'.format(video_id),
video_id,
headers={'Referer': 'https://vxxx.com'}
)
def _get_video_host(self):
return 'vxxx.com'
def _decode_base164(self, e):
"""
Some non-standard encoding called "base164" in the JavaScript code. It's
similar to the regular base64 with a slightly different alphabet:
- "АВСЕМ" are Cyrillic letters instead of uppercase English letters
- "." is used instead of "+"; "," is used instead of "/"
- "~" is used for padding instead of "="
"""
return base64.b64decode(e
.replace("А", "A")
.replace("В", "B")
.replace("С", "C")
.replace("Е", "E")
.replace("М", "M")
.replace(".", "+")
.replace(",", "/")
.replace("~", "=")
).decode()
def _extract_info(self, url):
matches = re.match(self._VALID_URL, url)
video_id = matches.group('id')
info_object = self._download_info_object(video_id)
info = {
'id': video_id,
'title': info_object['title'],
'display_id': info_object['dir'],
'thumbnail': info_object['thumb'],
'description': info_object['description'],
'timestamp': unified_timestamp(info_object['post_date']),
'duration': parse_duration(info_object['duration']),
'view_count': int(info_object['statistics']['viewed']),
'like_count': int(info_object['statistics']['likes']),
'dislike_count': int(info_object['statistics']['dislikes']),
'average_rating': float(info_object['statistics']['rating']),
'categories': [category['title'] for category in info_object['categories'].values()],
2022-10-02 10:50:56 +00:00
'age_limit': 18,
'formats': None
}
format_object = self._download_format_object(video_id)
m3u8_formats = self._extract_m3u8_formats(
"https://{}{}&f=video.m3u8".format(
self._get_video_host(),
self._decode_base164(format_object[0]['video_url'])
),
video_id, 'mp4')
self._sort_formats(m3u8_formats)
info['formats'] = m3u8_formats
return info
def _real_extract(self, url):
info = self._extract_info(url)
if not info['formats']:
return self.url_result(url, 'Generic')
return info