2022-10-02 07:11:53 +00:00
|
|
|
|
# coding: utf-8
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
2022-10-02 09:08:43 +00:00
|
|
|
|
import base64
|
2022-10-02 07:11:53 +00:00
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
from .common import InfoExtractor
|
2022-10-29 05:54:55 +00:00
|
|
|
|
from ..utils import (
|
|
|
|
|
parse_duration,
|
|
|
|
|
unified_timestamp,
|
|
|
|
|
)
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VXXXIE(InfoExtractor):
|
|
|
|
|
_VALID_URL = r'https?://vxxx\.com/video-(?P<id>\d+)'
|
|
|
|
|
_TESTS = [{
|
2022-10-02 09:08:43 +00:00
|
|
|
|
'url': 'https://vxxx.com/video-80747/',
|
2022-10-03 06:27:04 +00:00
|
|
|
|
'md5': '2f4bfd829b682ff9e3da1bda71b81b81',
|
2022-10-02 07:11:53 +00:00
|
|
|
|
'info_dict': {
|
|
|
|
|
'id': '80747',
|
|
|
|
|
'ext': 'mp4',
|
|
|
|
|
'title': 'Monica Aka Selina',
|
|
|
|
|
'display_id': 'monica-aka-selina',
|
|
|
|
|
'thumbnail': 'https://tn.vxxx.com/contents/videos_screenshots/80000/80747/420x236/1.jpg',
|
|
|
|
|
'description': '',
|
|
|
|
|
'timestamp': 1607167706,
|
|
|
|
|
'upload_date': '20201205',
|
|
|
|
|
'duration': 2373.0,
|
|
|
|
|
'categories': ['Anal', 'Asian', 'BDSM', 'Brunette', 'Toys',
|
|
|
|
|
'Fetish', 'HD', 'Interracial', 'MILF'],
|
2022-10-02 10:50:56 +00:00
|
|
|
|
'age_limit': 18,
|
|
|
|
|
}
|
|
|
|
|
}]
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
|
|
|
|
def _download_info_object(self, video_id):
|
|
|
|
|
return self._download_json(
|
2022-10-29 05:54:55 +00:00
|
|
|
|
self._INFO_OBJECT_URL_TMPL.format(
|
|
|
|
|
self._BASE_URL,
|
2022-10-02 09:08:43 +00:00
|
|
|
|
int(video_id) // 1000 * 1000,
|
2022-10-02 07:11:53 +00:00
|
|
|
|
video_id,
|
2022-10-29 05:54:55 +00:00
|
|
|
|
), video_id, headers={'Referer': self._BASE_URL})['video']
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
|
|
|
|
def _download_format_object(self, video_id):
|
|
|
|
|
return self._download_json(
|
2022-10-29 05:54:55 +00:00
|
|
|
|
self._FORMAT_OBJECT_URL_TMPL.format(self._BASE_URL, video_id),
|
2022-10-02 07:11:53 +00:00
|
|
|
|
video_id,
|
2022-10-29 05:54:55 +00:00
|
|
|
|
headers={'Referer': self._BASE_URL}
|
2022-10-02 07:11:53 +00:00
|
|
|
|
)
|
|
|
|
|
|
2022-10-29 05:54:55 +00:00
|
|
|
|
@classmethod
|
|
|
|
|
def _get_video_host(cls):
|
|
|
|
|
# or use the proper Python URL parsing functions
|
|
|
|
|
return cls._BASE_URL.split('//')[-1]
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
2022-10-02 09:08:43 +00:00
|
|
|
|
def _decode_base164(self, e):
|
|
|
|
|
"""
|
2022-10-02 09:26:06 +00:00
|
|
|
|
Some non-standard encoding called "base164" in the JavaScript code. It's
|
|
|
|
|
similar to the regular base64 with a slightly different alphabet:
|
2022-10-29 05:54:55 +00:00
|
|
|
|
- "АВСЕМ" are Cyrillic letters instead of uppercase Latin letters
|
2022-10-02 09:08:43 +00:00
|
|
|
|
- "." is used instead of "+"; "," is used instead of "/"
|
|
|
|
|
- "~" is used for padding instead of "="
|
|
|
|
|
"""
|
|
|
|
|
|
2022-10-29 05:54:55 +00:00
|
|
|
|
# using the kwarg to memoise the result
|
|
|
|
|
def get_trans_tbl(from_, to, tbl={}):
|
|
|
|
|
k = (from_, to)
|
|
|
|
|
if not tbl.get(k):
|
|
|
|
|
tbl[k] = string.maketrans(from_, to)
|
|
|
|
|
return tbl[k]
|
|
|
|
|
|
|
|
|
|
# maybe for the 2nd arg:
|
|
|
|
|
# import unicodedata and
|
|
|
|
|
# ''.join((unicodedata.lookup('CYRILLIC CAPITAL LETTER ' + x) for x in ('A', 'BE', 'ES', 'IE', 'EM'))) + '+/='
|
|
|
|
|
trans_tbl = get_trans_tbl('АBCEM.,~', 'ABCEM+/=')
|
|
|
|
|
return base64.b64decode(e.translate(trans_tbl)
|
2022-10-02 09:08:43 +00:00
|
|
|
|
).decode()
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
|
|
|
|
def _extract_info(self, url):
|
2022-10-29 05:54:55 +00:00
|
|
|
|
video_id = self._match_id(url)
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
2022-10-02 09:08:43 +00:00
|
|
|
|
info_object = self._download_info_object(video_id)
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
2022-10-29 05:54:55 +00:00
|
|
|
|
title = info_object['title']
|
|
|
|
|
stats = info_object.get('statistics') or {}
|
2022-10-02 07:11:53 +00:00
|
|
|
|
info = {
|
2022-10-02 09:08:43 +00:00
|
|
|
|
'id': video_id,
|
2022-10-29 05:54:55 +00:00
|
|
|
|
'title': title,
|
|
|
|
|
'display_id': info_object.get('dir'),
|
|
|
|
|
'thumbnail': url_or_none(info_object.get('thumb')),
|
|
|
|
|
'description': strip_or_none(info_object('description')) or None,
|
|
|
|
|
'timestamp': unified_timestamp(info_object.get('post_date')),
|
|
|
|
|
'duration': parse_duration(info_object.get('duration')),
|
|
|
|
|
'view_count': int_or_none(stats.get('viewed')),
|
|
|
|
|
'like_count': int_or_none(stats.get('likes')),
|
|
|
|
|
'dislike_count': int_or_none(stats.get('dislikes')),
|
|
|
|
|
'average_rating': float_or_none(stats.get('rating')),
|
|
|
|
|
'categories': [category['title'] for category in (info_object.get('categories') or {}).values() if category.get('title')],
|
2022-10-02 10:50:56 +00:00
|
|
|
|
'age_limit': 18,
|
2022-10-02 07:11:53 +00:00
|
|
|
|
}
|
|
|
|
|
|
2022-10-02 09:08:43 +00:00
|
|
|
|
format_object = self._download_format_object(video_id)
|
2022-10-03 06:27:04 +00:00
|
|
|
|
m3u8_formats = self._extract_m3u8_formats(
|
2022-10-29 05:54:55 +00:00
|
|
|
|
'https://{0}{1}&f=video.m3u8'.format(
|
2022-10-02 07:11:53 +00:00
|
|
|
|
self._get_video_host(),
|
2022-10-03 06:27:04 +00:00
|
|
|
|
self._decode_base164(format_object[0]['video_url'])
|
2022-10-02 07:11:53 +00:00
|
|
|
|
),
|
2022-10-03 06:27:04 +00:00
|
|
|
|
video_id, 'mp4')
|
|
|
|
|
self._sort_formats(m3u8_formats)
|
|
|
|
|
info['formats'] = m3u8_formats
|
2022-10-02 07:11:53 +00:00
|
|
|
|
|
|
|
|
|
return info
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
info = self._extract_info(url)
|
|
|
|
|
|
|
|
|
|
if not info['formats']:
|
|
|
|
|
return self.url_result(url, 'Generic')
|
|
|
|
|
|
|
|
|
|
return info
|