[VXXX] Fix the non-standard base164 encoding

2025-02-18 21:30:28 +00:00 · 2022-10-02 05:08:43 -04:00 · 2022-10-02 05:08:43 -04:00 · c0bda232e9
commit c0bda232e9
parent 6b7441ed64
1 changed files with 29 additions and 25 deletions
--- a/youtube_dl/extractor/vxxx.py
+++ b/youtube_dl/extractor/vxxx.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import base64
 import re
 from .common import InfoExtractor
@ -10,7 +11,7 @@ from ..utils import unified_timestamp, parse_duration
 class VXXXIE(InfoExtractor):
    _VALID_URL = r'https?://vxxx\.com/video-(?P<id>\d+)'
    _TESTS = [{
-        'url': 'https://vxxx.com/video-80747',
+        'url': 'https://vxxx.com/video-80747/',
        'md5': '4736e868b0e008b4ff9dc09585c26c52',
        'info_dict': {
            'id': '80747',
@ -33,7 +34,7 @@ class VXXXIE(InfoExtractor):
    def _download_info_object(self, video_id):
        return self._download_json(
            'https://vxxx.com/api/json/video/86400/0/{}/{}.json'.format(
-                int(video_id) // 10000 * 10000,
+                int(video_id) // 1000 * 1000,
                video_id,
            ), video_id, headers={'Referer': 'https://vxxx.com'})['video']
@ -47,32 +48,34 @@ class VXXXIE(InfoExtractor):
    def _get_video_host(self):
        return 'vxxx.com'
-    def _decode_base164(self, text):
+    def _decode_base164(self, e):
-        alphabet = [*'АВСDЕFGHIJKLМNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,~']
+        """
-        bit_str = ''
+        Some non-standard encoding called "base164" in the JavaScript code. It
-        text_str = ''
+        is similar to base 64 with some alphabets replaced:
            - "АВСЕМ" are Cyrillic letters instead of uppercase English letters
            - "." is used instead of "+"; "," is used instead of "/"
            - "~" is used for padding instead of "="
        """
-        for char in text:
+        return base64.b64decode(e
-            if char in alphabet:
+                                .replace("А", "A")
-                bin_char = bin(alphabet.index(char)).lstrip("0b")
+                                .replace("В", "B")
-                bin_char = bin_char.zfill(6)
+                                .replace("С", "C")
-                bit_str += bin_char
+                                .replace("Е", "E")
-
+                                .replace("М", "M")
-        brackets = [bit_str[x:x + 8] for x in range(0, len(bit_str), 8)]
+                                .replace(".", "+")
-
+                                .replace(",", "/")
-        for bracket in brackets:
+                                .replace("~", "=")
-            text_str += chr(int(bracket, 2))
+                                ).decode()
        return text_str
    def _extract_info(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        matches = re.match(self._VALID_URL, url)
-        id = mobj.group('id')
+        video_id = matches.group('id')
-        info_object = self._download_info_object(id)
+        info_object = self._download_info_object(video_id)
        info = {
-            'id': id,
+            'id': video_id,
            'title': info_object['title'],
            'display_id': info_object['dir'],
            'thumbnail': info_object['thumb'],
@ -88,11 +91,12 @@ class VXXXIE(InfoExtractor):
        }
        qualities = {
-            '_hd.mp4': -1,
+            '_fhd.mp4': -1,
-            '_sd.mp4': -2
+            '_hd.mp4': -2,
            '_sd.mp4': -3
        }
-        format_object = self._download_format_object(id)
+        format_object = self._download_format_object(video_id)
        formats = list(map(lambda f: {
            'url': "https://{}{}".format(
                self._get_video_host(),