From 3b74d490e072b58e59b16561d4736601f1c62448 Mon Sep 17 00:00:00 2001 From: Max Teegen <870074+max-te@users.noreply.github.com> Date: Sun, 13 Jun 2021 16:25:19 +0200 Subject: [PATCH 1/3] More string operators for --match-filter --- README.md | 115 ++++++++++++++++++++++++++++++++++++-------- youtube_dl/utils.py | 10 ++-- 2 files changed, 102 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 2841ed68f..cfa9e9fb3 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo CIDR notation ## Video Selection: +<<<<<<< HEAD --playlist-start NUMBER Playlist video to start at (default is 1) --playlist-end NUMBER Playlist video to end at (default is @@ -160,27 +161,33 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo than COUNT views --max-views COUNT Do not download any videos with more than COUNT views - --match-filter FILTER Generic video filter. Specify any key - (see the "OUTPUT TEMPLATE" for a list - of available keys) to match if the key - is present, !key to check if the key is - not present, key > NUMBER (like - "comment_count > 12", also works with - >=, <, <=, !=, =) to compare against a - number, key = 'LITERAL' (like "uploader - = 'Mike Smith'", also works with !=) to - match against a string literal and & to - require multiple matches. Values which - are not known are excluded unless you - put a question mark (?) after the - operator. For example, to only match - videos that have been liked more than - 100 times and disliked less than 50 - times (or the dislike functionality is - not available at the given service), - but who also have a description, use - --match-filter "like_count > 100 & + --match-filter FILTER Generic video filter. Specify any key (see + the "OUTPUT TEMPLATE" for a list of + available keys) to match if the key is + present, !key to check if the key is not + present, key > NUMBER (like "comment_count + > 12", also works with >=, <, <=, !=, =) to + compare against a number, key = 'LITERAL' + (like "uploader = 'Mike Smith'", also works + with !=) to match against a string literal + and & to require multiple matches. Values + which are not known are excluded unless you + put a question mark (?) after the operator. + For example, to only match videos that have + been liked more than 100 times and disliked + less than 50 times (or the dislike + functionality is not available at the given + service), but who also have a description, + use --match-filter "like_count > 100 & dislike_count NUMBER (like "comment_count + > 12", also works with >=, <, <=, !=, =) to + compare against a number, key = 'LITERAL' + (like "uploader = 'Mike Smith'", also works + with !=) to match against a string literal + and & to require multiple matches. Values + which are not known are excluded unless you + put a question mark (?) after the operator. + For example, to only match videos that have + been liked more than 100 times and disliked + less than 50 times (or the dislike + functionality is not available at the given + service), but who also have a description, + use --match-filter "like_count > 100 & + dislike_count >>>>>> dd954e809 (More string operators for --match-filter) ## Download Options: -r, --limit-rate RATE Maximum download rate in bytes per diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e722eed58..a774b0c9d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4369,6 +4369,10 @@ def _match_one(filter_part, dct): '>=': operator.ge, '=': operator.eq, '!=': operator.ne, + '~=': operator.contains, + '!~=': lambda left, right: not operator.contains(left, right), + '*=': lambda left, right: bool(re.search(right, left)), + '!*=': lambda left, right: not bool(re.search(right, left)), } operator_rex = re.compile(r'''(?x)\s* (?P[a-z_]+) @@ -4392,14 +4396,14 @@ def _match_one(filter_part, dct): # https://github.com/ytdl-org/youtube-dl/issues/11082). or actual_value is not None and m.group('intval') is not None and isinstance(actual_value, compat_str)): - if m.group('op') not in ('=', '!='): - raise ValueError( - 'Operator %s does not support string values!' % m.group('op')) comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') quote = m.group('quote') if quote is not None: comparison_value = comparison_value.replace(r'\%s' % quote, quote) else: + if m.group('op') in ('~=', '!~=', '*=', '!*='): + raise ValueError( + 'Operator %s only supports string values!' % m.group('op')) try: comparison_value = int(m.group('intval')) except ValueError: From 9ec3eb2a3d1b53cc025e914241cda03edbacc7e2 Mon Sep 17 00:00:00 2001 From: Max Teegen <870074+max-te@users.noreply.github.com> Date: Wed, 9 Dec 2020 22:06:07 +0100 Subject: [PATCH 2/3] Model match-filter string comparison after format selection --- README.md | 30 +++++++++++++----------------- test/test_utils.py | 12 +++++++++++- youtube_dl/utils.py | 18 ++++++++++-------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index cfa9e9fb3..5f6ce7854 100644 --- a/README.md +++ b/README.md @@ -237,24 +237,20 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo compare against a number, key = 'LITERAL' (like "uploader = 'Mike Smith'", also works with !=) to match against a string literal - and & to require multiple matches. Values - which are not known are excluded unless you - put a question mark (?) after the operator. - For example, to only match videos that have - been liked more than 100 times and disliked - less than 50 times (or the dislike - functionality is not available at the given - service), but who also have a description, - use --match-filter "like_count > 100 & + and & to require multiple matches. The + string comparisons ^= (starts with), *= + (contains) and $= (ends with). Every + comparison operator can be prefixed by ! to + negate it. Values which are not known are + excluded unless you put a question mark (?) + after the operator. For example, to only + match videos that have been liked more than + 100 times and disliked less than 50 times + (or the dislike functionality is not + available at the given service), but who + also have a description, use + --match-filter "like_count > 100 & dislike_count foobar', {}) self.assertFalse(match_str('xy', {'x': 1200})) self.assertTrue(match_str('!xy', {'x': 1200})) self.assertTrue(match_str('x', {'x': 1200})) @@ -1195,6 +1194,17 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) + self.assertTrue(match_str('y^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y^=bar', {'y': 'foobar42'})) + self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'})) + self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42}) + self.assertTrue(match_str('y*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y$=42', {'y': 'foobar42'})) + self.assertFalse(match_str('y$=43', {'y': 'foobar42'})) self.assertFalse(match_str( 'like_count > 100 & dislike_count ': operator.gt, '>=': operator.ge, '=': operator.eq, - '!=': operator.ne, - '~=': operator.contains, - '!~=': lambda left, right: not operator.contains(left, right), - '*=': lambda left, right: bool(re.search(right, left)), - '!*=': lambda left, right: not bool(re.search(right, left)), + '*=': operator.contains, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), } operator_rex = re.compile(r'''(?x)\s* (?P[a-z_]+) - \s*(?P%s)(?P\s*\?)?\s* + \s*(?P!\s*)?(?P%s)(?P\s*\?)?\s* (?: (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| (?P["\'])(?P(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| @@ -4386,7 +4384,11 @@ def _match_one(filter_part, dct): ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) m = operator_rex.search(filter_part) if m: - op = COMPARISON_OPERATORS[m.group('op')] + unnegated_op = COMPARISON_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not unnegated_op(attr, value) + else: + op = unnegated_op actual_value = dct.get(m.group('key')) if (m.group('quotedstrval') is not None or m.group('strval') is not None @@ -4401,7 +4403,7 @@ def _match_one(filter_part, dct): if quote is not None: comparison_value = comparison_value.replace(r'\%s' % quote, quote) else: - if m.group('op') in ('~=', '!~=', '*=', '!*='): + if m.group('op') in ('*=', '^=', '$='): raise ValueError( 'Operator %s only supports string values!' % m.group('op')) try: From c60284b6f2fd9a77adba07cd7e3ce1a7aae52ae2 Mon Sep 17 00:00:00 2001 From: Max Teegen <870074+max-te@users.noreply.github.com> Date: Sun, 13 Jun 2021 16:32:04 +0200 Subject: [PATCH 3/3] Remove accidental merge conflict markers. --- README.md | 65 ------------------------------------------------------- 1 file changed, 65 deletions(-) diff --git a/README.md b/README.md index 5f6ce7854..14089ef3b 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo CIDR notation ## Video Selection: -<<<<<<< HEAD --playlist-start NUMBER Playlist video to start at (default is 1) --playlist-end NUMBER Playlist video to end at (default is @@ -199,70 +198,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo downloaded videos in it. --include-ads Download advertisements as well (experimental) -======= - --playlist-start NUMBER Playlist video to start at (default is 1) - --playlist-end NUMBER Playlist video to end at (default is last) - --playlist-items ITEM_SPEC Playlist video items to download. Specify - indices of the videos in the playlist - separated by commas like: "--playlist-items - 1,2,5,8" if you want to download videos - indexed 1, 2, 5, 8 in the playlist. You can - specify range: "--playlist-items - 1-3,7,10-13", it will download the videos - at index 1, 2, 3, 7, 10, 11, 12 and 13. - --match-title REGEX Download only matching titles (regex or - caseless sub-string) - --reject-title REGEX Skip download for matching titles (regex or - caseless sub-string) - --max-downloads NUMBER Abort after downloading NUMBER files - --min-filesize SIZE Do not download any videos smaller than - SIZE (e.g. 50k or 44.6m) - --max-filesize SIZE Do not download any videos larger than SIZE - (e.g. 50k or 44.6m) - --date DATE Download only videos uploaded in this date - --datebefore DATE Download only videos uploaded on or before - this date (i.e. inclusive) - --dateafter DATE Download only videos uploaded on or after - this date (i.e. inclusive) - --min-views COUNT Do not download any videos with less than - COUNT views - --max-views COUNT Do not download any videos with more than - COUNT views - --match-filter FILTER Generic video filter. Specify any key (see - the "OUTPUT TEMPLATE" for a list of - available keys) to match if the key is - present, !key to check if the key is not - present, key > NUMBER (like "comment_count - > 12", also works with >=, <, <=, !=, =) to - compare against a number, key = 'LITERAL' - (like "uploader = 'Mike Smith'", also works - with !=) to match against a string literal - and & to require multiple matches. The - string comparisons ^= (starts with), *= - (contains) and $= (ends with). Every - comparison operator can be prefixed by ! to - negate it. Values which are not known are - excluded unless you put a question mark (?) - after the operator. For example, to only - match videos that have been liked more than - 100 times and disliked less than 50 times - (or the dislike functionality is not - available at the given service), but who - also have a description, use - --match-filter "like_count > 100 & - dislike_count >>>>>> dd954e809 (More string operators for --match-filter) ## Download Options: -r, --limit-rate RATE Maximum download rate in bytes per