1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-16 22:36:27 +00:00

[VXXX] Fix the non-standard base164 encoding

This commit is contained in:
tabjy 2022-10-02 05:08:43 -04:00
parent 6b7441ed64
commit c0bda232e9

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -10,7 +11,7 @@ from ..utils import unified_timestamp, parse_duration
class VXXXIE(InfoExtractor): class VXXXIE(InfoExtractor):
_VALID_URL = r'https?://vxxx\.com/video-(?P<id>\d+)' _VALID_URL = r'https?://vxxx\.com/video-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://vxxx.com/video-80747', 'url': 'https://vxxx.com/video-80747/',
'md5': '4736e868b0e008b4ff9dc09585c26c52', 'md5': '4736e868b0e008b4ff9dc09585c26c52',
'info_dict': { 'info_dict': {
'id': '80747', 'id': '80747',
@ -33,7 +34,7 @@ class VXXXIE(InfoExtractor):
def _download_info_object(self, video_id): def _download_info_object(self, video_id):
return self._download_json( return self._download_json(
'https://vxxx.com/api/json/video/86400/0/{}/{}.json'.format( 'https://vxxx.com/api/json/video/86400/0/{}/{}.json'.format(
int(video_id) // 10000 * 10000, int(video_id) // 1000 * 1000,
video_id, video_id,
), video_id, headers={'Referer': 'https://vxxx.com'})['video'] ), video_id, headers={'Referer': 'https://vxxx.com'})['video']
@ -47,32 +48,34 @@ class VXXXIE(InfoExtractor):
def _get_video_host(self): def _get_video_host(self):
return 'vxxx.com' return 'vxxx.com'
def _decode_base164(self, text): def _decode_base164(self, e):
alphabet = [*'АВСDЕFGHIJKLМNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,~'] """
bit_str = '' Some non-standard encoding called "base164" in the JavaScript code. It
text_str = '' is similar to base 64 with some alphabets replaced:
- "АВСЕМ" are Cyrillic letters instead of uppercase English letters
- "." is used instead of "+"; "," is used instead of "/"
- "~" is used for padding instead of "="
"""
for char in text: return base64.b64decode(e
if char in alphabet: .replace("А", "A")
bin_char = bin(alphabet.index(char)).lstrip("0b") .replace("В", "B")
bin_char = bin_char.zfill(6) .replace("С", "C")
bit_str += bin_char .replace("Е", "E")
.replace("М", "M")
brackets = [bit_str[x:x + 8] for x in range(0, len(bit_str), 8)] .replace(".", "+")
.replace(",", "/")
for bracket in brackets: .replace("~", "=")
text_str += chr(int(bracket, 2)) ).decode()
return text_str
def _extract_info(self, url): def _extract_info(self, url):
mobj = re.match(self._VALID_URL, url) matches = re.match(self._VALID_URL, url)
id = mobj.group('id') video_id = matches.group('id')
info_object = self._download_info_object(id) info_object = self._download_info_object(video_id)
info = { info = {
'id': id, 'id': video_id,
'title': info_object['title'], 'title': info_object['title'],
'display_id': info_object['dir'], 'display_id': info_object['dir'],
'thumbnail': info_object['thumb'], 'thumbnail': info_object['thumb'],
@ -88,11 +91,12 @@ class VXXXIE(InfoExtractor):
} }
qualities = { qualities = {
'_hd.mp4': -1, '_fhd.mp4': -1,
'_sd.mp4': -2 '_hd.mp4': -2,
'_sd.mp4': -3
} }
format_object = self._download_format_object(id) format_object = self._download_format_object(video_id)
formats = list(map(lambda f: { formats = list(map(lambda f: {
'url': "https://{}{}".format( 'url': "https://{}{}".format(
self._get_video_host(), self._get_video_host(),