youtube-dl/youtube_dl/extractor/blerp.py

# coding: utf-8
from __future__ import unicode_literals

import json

from ..utils import (
    strip_or_none,
    traverse_obj,
)
from .common import InfoExtractor


class BlerpIE(InfoExtractor):
    IE_NAME = 'blerp'
    _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
    _TESTS = [{
        'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
        'info_dict': {
            'id': '6320fe8745636cb4dd677a5a',
            'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
            'uploader': 'luminousaj',
            'uploader_id': '5fb81e51aa66ae000c395478',
            'ext': 'mp3',
            'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
        }
    }, {
        'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
        'info_dict': {
            'id': '5bc94ef4796001000498429f',
            'title': 'Yee',
            'uploader': '179617322678353920',
            'uploader_id': '5ba99cf71386730004552c42',
            'ext': 'mp3',
            'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
        }
    }]

    _GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
    _GRAPHQL_QUERY = (
        '''query webBitePageGetBite($_id: MongoID!) {
            web {
                biteById(_id: $_id) {
                    ...bitePageFrag
                    __typename
                }
                __typename
            }
        }

        fragment bitePageFrag on Bite {
            _id
            title
            userKeywords
            keywords
            color
            visibility
            isPremium
            owned
            price
            extraReview
            isAudioExists
            image {
                filename
                original {
                    url
                    __typename
                }
                __typename
            }
            userReactions {
                _id
                reactions
                createdAt
                __typename
            }
            topReactions
            totalSaveCount
            saved
            blerpLibraryType
            license
            licenseMetaData
            playCount
            totalShareCount
            totalFavoriteCount
            totalAddedToBoardCount
            userCategory
            userAudioQuality
            audioCreationState
            transcription
            userTranscription
            description
            createdAt
            updatedAt
            author
            listingType
            ownerObject {
                _id
                username
                profileImage {
                    filename
                    original {
                        url
                        __typename
                    }
                    __typename
                }
                __typename
            }
            transcription
            favorited
            visibility
            isCurated
            sourceUrl
            audienceRating
            strictAudienceRating
            ownerId
            reportObject {
                reportedContentStatus
                __typename
            }
            giphy {
                mp4
                gif
                __typename
            }
            audio {
                filename
                original {
                    url
                    __typename
                }
                mp3 {
                    url
                    __typename
                }
                __typename
            }
            __typename
        }

        ''')

    def _real_extract(self, url):
        audio_id = self._match_id(url)

        data = {
            'operationName': self._GRAPHQL_OPERATIONNAME,
            'query': self._GRAPHQL_QUERY,
            'variables': {
                '_id': audio_id
            }
        }

        headers = {
            'Content-Type': 'application/json'
        }

        json_result = self._download_json('https://api.blerp.com/graphql',
                                          audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)

        bite_json = json_result['data']['web']['biteById']

        info_dict = {
            'id': bite_json['_id'],
            'url': bite_json['audio']['mp3']['url'],
            'title': bite_json['title'],
            'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
            'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
            'ext': 'mp3',
            'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
        }

        return info_dict
[Blerp] Add new extractor (#31398) Co-authored-by: dirkf <fieldhouse@gmx.net> 2023-02-02 23:03:09 +05:30			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import json`

			`from ..utils import (`
			`strip_or_none,`
			`traverse_obj,`
			`)`
			`from .common import InfoExtractor`


			`class BlerpIE(InfoExtractor):`
			`IE_NAME = 'blerp'`
			`_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'`
			`_TESTS = [{`
			`'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',`
			`'info_dict': {`
			`'id': '6320fe8745636cb4dd677a5a',`
			`'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',`
			`'uploader': 'luminousaj',`
			`'uploader_id': '5fb81e51aa66ae000c395478',`
			`'ext': 'mp3',`
			`'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],`
			`}`
			`}, {`
			`'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',`
			`'info_dict': {`
			`'id': '5bc94ef4796001000498429f',`
			`'title': 'Yee',`
			`'uploader': '179617322678353920',`
			`'uploader_id': '5ba99cf71386730004552c42',`
			`'ext': 'mp3',`
			`'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']`
			`}`
			`}]`

			`_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"`
			`_GRAPHQL_QUERY = (`
			`'''query webBitePageGetBite($_id: MongoID!) {`
			`web {`
			`biteById(_id: $_id) {`
			`...bitePageFrag`
			`__typename`
			`}`
			`__typename`
			`}`
			`}`

			`fragment bitePageFrag on Bite {`
			`_id`
			`title`
			`userKeywords`
			`keywords`
			`color`
			`visibility`
			`isPremium`
			`owned`
			`price`
			`extraReview`
			`isAudioExists`
			`image {`
			`filename`
			`original {`
			`url`
			`__typename`
			`}`
			`__typename`
			`}`
			`userReactions {`
			`_id`
			`reactions`
			`createdAt`
			`__typename`
			`}`
			`topReactions`
			`totalSaveCount`
			`saved`
			`blerpLibraryType`
			`license`
			`licenseMetaData`
			`playCount`
			`totalShareCount`
			`totalFavoriteCount`
			`totalAddedToBoardCount`
			`userCategory`
			`userAudioQuality`
			`audioCreationState`
			`transcription`
			`userTranscription`
			`description`
			`createdAt`
			`updatedAt`
			`author`
			`listingType`
			`ownerObject {`
			`_id`
			`username`
			`profileImage {`
			`filename`
			`original {`
			`url`
			`__typename`
			`}`
			`__typename`
			`}`
			`__typename`
			`}`
			`transcription`
			`favorited`
			`visibility`
			`isCurated`
			`sourceUrl`
			`audienceRating`
			`strictAudienceRating`
			`ownerId`
			`reportObject {`
			`reportedContentStatus`
			`__typename`
			`}`
			`giphy {`
			`mp4`
			`gif`
			`__typename`
			`}`
			`audio {`
			`filename`
			`original {`
			`url`
			`__typename`
			`}`
			`mp3 {`
			`url`
			`__typename`
			`}`
			`__typename`
			`}`
			`__typename`
			`}`

			`''')`

			`def _real_extract(self, url):`
			`audio_id = self._match_id(url)`

			`data = {`
			`'operationName': self._GRAPHQL_OPERATIONNAME,`
			`'query': self._GRAPHQL_QUERY,`
			`'variables': {`
			`'_id': audio_id`
			`}`
			`}`

			`headers = {`
			`'Content-Type': 'application/json'`
			`}`

			`json_result = self._download_json('https://api.blerp.com/graphql',`
			`audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)`

			`bite_json = json_result['data']['web']['biteById']`

			`info_dict = {`
			`'id': bite_json['_id'],`
			`'url': bite_json['audio']['mp3']['url'],`
			`'title': bite_json['title'],`
			`'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),`
			`'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),`
			`'ext': 'mp3',`
			`'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)`
			`}`

			`return info_dict`