1
0
Fork 0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-06-10 05:49:37 +00:00

Model match-filter string comparison after format selection

This commit is contained in:
Max Teegen 2020-12-09 22:06:07 +01:00
parent 3b74d490e0
commit 9ec3eb2a3d
3 changed files with 34 additions and 26 deletions

View file

@ -237,24 +237,20 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
compare against a number, key = 'LITERAL' compare against a number, key = 'LITERAL'
(like "uploader = 'Mike Smith'", also works (like "uploader = 'Mike Smith'", also works
with !=) to match against a string literal with !=) to match against a string literal
and & to require multiple matches. Values and & to require multiple matches. The
which are not known are excluded unless you string comparisons ^= (starts with), *=
put a question mark (?) after the operator. (contains) and $= (ends with). Every
For example, to only match videos that have comparison operator can be prefixed by ! to
been liked more than 100 times and disliked negate it. Values which are not known are
less than 50 times (or the dislike excluded unless you put a question mark (?)
functionality is not available at the given after the operator. For example, to only
service), but who also have a description, match videos that have been liked more than
use --match-filter "like_count > 100 & 100 times and disliked less than 50 times
(or the dislike functionality is not
available at the given service), but who
also have a description, use
--match-filter "like_count > 100 &
dislike_count <? 50 & description" . dislike_count <? 50 & description" .
For matching strings, the oparators ~= and
!~= check for string containment and
exclusion. The operators *= and !*= search
for a regular expression.
For example, to only match videos which
have neither 'sponsored' nor 'Sponsored' in
the title, use --match-filter "title !*=
'[Ss]ponsored'"
--no-playlist Download only the video, if the URL refers --no-playlist Download only the video, if the URL refers
to a video and a playlist. to a video and a playlist.
--yes-playlist Download the playlist, if the URL refers to --yes-playlist Download the playlist, if the URL refers to

View file

@ -1178,7 +1178,6 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
'9999 51') '9999 51')
def test_match_str(self): def test_match_str(self):
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
self.assertFalse(match_str('xy', {'x': 1200})) self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200})) self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200})) self.assertTrue(match_str('x', {'x': 1200}))
@ -1195,6 +1194,17 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
self.assertFalse(match_str( self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description', 'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'})) {'like_count': 90, 'description': 'foo'}))

View file

@ -4368,15 +4368,13 @@ def _match_one(filter_part, dct):
'>': operator.gt, '>': operator.gt,
'>=': operator.ge, '>=': operator.ge,
'=': operator.eq, '=': operator.eq,
'!=': operator.ne, '*=': operator.contains,
'~=': operator.contains, '^=': lambda attr, value: attr.startswith(value),
'!~=': lambda left, right: not operator.contains(left, right), '$=': lambda attr, value: attr.endswith(value),
'*=': lambda left, right: bool(re.search(right, left)),
'!*=': lambda left, right: not bool(re.search(right, left)),
} }
operator_rex = re.compile(r'''(?x)\s* operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+) (?P<key>[a-z_]+)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?: (?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
@ -4386,7 +4384,11 @@ def _match_one(filter_part, dct):
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex.search(filter_part) m = operator_rex.search(filter_part)
if m: if m:
op = COMPARISON_OPERATORS[m.group('op')] unnegated_op = COMPARISON_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not unnegated_op(attr, value)
else:
op = unnegated_op
actual_value = dct.get(m.group('key')) actual_value = dct.get(m.group('key'))
if (m.group('quotedstrval') is not None if (m.group('quotedstrval') is not None
or m.group('strval') is not None or m.group('strval') is not None
@ -4401,7 +4403,7 @@ def _match_one(filter_part, dct):
if quote is not None: if quote is not None:
comparison_value = comparison_value.replace(r'\%s' % quote, quote) comparison_value = comparison_value.replace(r'\%s' % quote, quote)
else: else:
if m.group('op') in ('~=', '!~=', '*=', '!*='): if m.group('op') in ('*=', '^=', '$='):
raise ValueError( raise ValueError(
'Operator %s only supports string values!' % m.group('op')) 'Operator %s only supports string values!' % m.group('op'))
try: try: