From 4a249eaecae82f0e70ab13818a8747554b47d7cf Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 21 Jan 2025 19:35:22 +0000 Subject: [PATCH] GH-127381: pathlib ABCs: remove `JoinablePath.match()` Unlike `ReadablePath.[r]glob()` and `JoinablePath.full_match()`, the `JoinablePath.match()` method doesn't support the recursive wildcard `**`, and matches from the right when a fully relative pattern is given. These quirks means its probably unsuitable for inclusion in the pathlib ABCs, especially given `full_match()` handles the same use case. --- Lib/pathlib/_abc.py | 27 -------- Lib/pathlib/_local.py | 26 +++++++ Lib/test/test_pathlib/test_pathlib.py | 78 +++++++++++++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 82 ----------------------- 4 files changed, 104 insertions(+), 109 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index d55cc6f243cf2b..e498dc78e83b5e 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -358,33 +358,6 @@ def parents(self): parent = split(path)[0] return tuple(parents) - def match(self, path_pattern, *, case_sensitive=None): - """ - Return True if this path matches the given pattern. If the pattern is - relative, matching is done from the right; otherwise, the entire path - is matched. The recursive wildcard '**' is *not* supported by this - method. - """ - if not isinstance(path_pattern, JoinablePath): - path_pattern = self.with_segments(path_pattern) - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self.parser) - sep = path_pattern.parser.sep - path_parts = self.parts[::-1] - pattern_parts = path_pattern.parts[::-1] - if not pattern_parts: - raise ValueError("empty pattern") - if len(path_parts) < len(pattern_parts): - return False - if len(path_parts) > len(pattern_parts) and path_pattern.anchor: - return False - globber = PathGlobber(sep, case_sensitive) - for path_part, pattern_part in zip(path_parts, pattern_parts): - match = globber.compile(pattern_part) - if match(path_part) is None: - return False - return True - def full_match(self, pattern, *, case_sensitive=None): """ Return True if this path matches the given glob-style pattern. The diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 2b42f3c22254b8..b3ec934f7510de 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -668,6 +668,32 @@ def full_match(self, pattern, *, case_sensitive=None): globber = _StringGlobber(self.parser.sep, case_sensitive, recursive=True) return globber.compile(pattern)(path) is not None + def match(self, path_pattern, *, case_sensitive=None): + """ + Return True if this path matches the given pattern. If the pattern is + relative, matching is done from the right; otherwise, the entire path + is matched. The recursive wildcard '**' is *not* supported by this + method. + """ + if not isinstance(path_pattern, PurePath): + path_pattern = self.with_segments(path_pattern) + if case_sensitive is None: + case_sensitive = self.parser is posixpath + path_parts = self.parts[::-1] + pattern_parts = path_pattern.parts[::-1] + if not pattern_parts: + raise ValueError("empty pattern") + if len(path_parts) < len(pattern_parts): + return False + if len(path_parts) > len(pattern_parts) and path_pattern.anchor: + return False + globber = _StringGlobber(self.parser.sep, case_sensitive) + for path_part, pattern_part in zip(path_parts, pattern_parts): + match = globber.compile(pattern_part) + if match(path_part) is None: + return False + return True + # Subclassing os.PathLike makes isinstance() checks slower, # which in turn makes Path construction slower. Register instead! os.PathLike.register(PurePath) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 866a2d07dd692a..d64092b710a4d6 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -438,6 +438,84 @@ def test_match_empty(self): self.assertRaises(ValueError, P('a').match, '') self.assertRaises(ValueError, P('a').match, '.') + def test_match_common(self): + P = self.cls + # Simple relative pattern. + self.assertTrue(P('b.py').match('b.py')) + self.assertTrue(P('a/b.py').match('b.py')) + self.assertTrue(P('/a/b.py').match('b.py')) + self.assertFalse(P('a.py').match('b.py')) + self.assertFalse(P('b/py').match('b.py')) + self.assertFalse(P('/a.py').match('b.py')) + self.assertFalse(P('b.py/c').match('b.py')) + # Wildcard relative pattern. + self.assertTrue(P('b.py').match('*.py')) + self.assertTrue(P('a/b.py').match('*.py')) + self.assertTrue(P('/a/b.py').match('*.py')) + self.assertFalse(P('b.pyc').match('*.py')) + self.assertFalse(P('b./py').match('*.py')) + self.assertFalse(P('b.py/c').match('*.py')) + # Multi-part relative pattern. + self.assertTrue(P('ab/c.py').match('a*/*.py')) + self.assertTrue(P('/d/ab/c.py').match('a*/*.py')) + self.assertFalse(P('a.py').match('a*/*.py')) + self.assertFalse(P('/dab/c.py').match('a*/*.py')) + self.assertFalse(P('ab/c.py/d').match('a*/*.py')) + # Absolute pattern. + self.assertTrue(P('/b.py').match('/*.py')) + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('a/b.py').match('/*.py')) + self.assertFalse(P('/a/b.py').match('/*.py')) + # Multi-part absolute pattern. + self.assertTrue(P('/a/b.py').match('/a/*.py')) + self.assertFalse(P('/ab.py').match('/a/*.py')) + self.assertFalse(P('/a/b/c.py').match('/a/*.py')) + # Multi-part glob-style pattern. + self.assertFalse(P('/a/b/c.py').match('/**/*.py')) + self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) + # Case-sensitive flag + self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) + # Matching against empty path + self.assertFalse(P('').match('*')) + self.assertFalse(P('').match('**')) + self.assertFalse(P('').match('**/*')) + + @needs_posix + def test_match_posix(self): + P = self.cls + self.assertFalse(P('A.py').match('a.PY')) + + @needs_windows + def test_match_windows(self): + P = self.cls + # Absolute patterns. + self.assertTrue(P('c:/b.py').match('*:/*.py')) + self.assertTrue(P('c:/b.py').match('c:/*.py')) + self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('b.py').match('c:*.py')) + self.assertFalse(P('b.py').match('c:/*.py')) + self.assertFalse(P('c:b.py').match('/*.py')) + self.assertFalse(P('c:b.py').match('c:/*.py')) + self.assertFalse(P('/b.py').match('c:*.py')) + self.assertFalse(P('/b.py').match('c:/*.py')) + # UNC patterns. + self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) + self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) + # Case-insensitivity. + self.assertTrue(P('B.py').match('b.PY')) + self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) + self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) + # Path anchor doesn't match pattern anchor + self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' + self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' + self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' + @needs_posix def test_parse_path_posix(self): check = self._check_parse_path diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index d60bb147b72971..e67bead4297829 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -296,88 +296,6 @@ def test_str_windows(self): p = self.cls('//a/b/c/d') self.assertEqual(str(p), '\\\\a\\b\\c\\d') - def test_match_empty(self): - P = self.cls - self.assertRaises(ValueError, P('a').match, '') - - def test_match_common(self): - P = self.cls - # Simple relative pattern. - self.assertTrue(P('b.py').match('b.py')) - self.assertTrue(P('a/b.py').match('b.py')) - self.assertTrue(P('/a/b.py').match('b.py')) - self.assertFalse(P('a.py').match('b.py')) - self.assertFalse(P('b/py').match('b.py')) - self.assertFalse(P('/a.py').match('b.py')) - self.assertFalse(P('b.py/c').match('b.py')) - # Wildcard relative pattern. - self.assertTrue(P('b.py').match('*.py')) - self.assertTrue(P('a/b.py').match('*.py')) - self.assertTrue(P('/a/b.py').match('*.py')) - self.assertFalse(P('b.pyc').match('*.py')) - self.assertFalse(P('b./py').match('*.py')) - self.assertFalse(P('b.py/c').match('*.py')) - # Multi-part relative pattern. - self.assertTrue(P('ab/c.py').match('a*/*.py')) - self.assertTrue(P('/d/ab/c.py').match('a*/*.py')) - self.assertFalse(P('a.py').match('a*/*.py')) - self.assertFalse(P('/dab/c.py').match('a*/*.py')) - self.assertFalse(P('ab/c.py/d').match('a*/*.py')) - # Absolute pattern. - self.assertTrue(P('/b.py').match('/*.py')) - self.assertFalse(P('b.py').match('/*.py')) - self.assertFalse(P('a/b.py').match('/*.py')) - self.assertFalse(P('/a/b.py').match('/*.py')) - # Multi-part absolute pattern. - self.assertTrue(P('/a/b.py').match('/a/*.py')) - self.assertFalse(P('/ab.py').match('/a/*.py')) - self.assertFalse(P('/a/b/c.py').match('/a/*.py')) - # Multi-part glob-style pattern. - self.assertFalse(P('/a/b/c.py').match('/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) - # Case-sensitive flag - self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) - self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) - self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) - self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) - # Matching against empty path - self.assertFalse(P('').match('*')) - self.assertFalse(P('').match('**')) - self.assertFalse(P('').match('**/*')) - - @needs_posix - def test_match_posix(self): - P = self.cls - self.assertFalse(P('A.py').match('a.PY')) - - @needs_windows - def test_match_windows(self): - P = self.cls - # Absolute patterns. - self.assertTrue(P('c:/b.py').match('*:/*.py')) - self.assertTrue(P('c:/b.py').match('c:/*.py')) - self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive - self.assertFalse(P('b.py').match('/*.py')) - self.assertFalse(P('b.py').match('c:*.py')) - self.assertFalse(P('b.py').match('c:/*.py')) - self.assertFalse(P('c:b.py').match('/*.py')) - self.assertFalse(P('c:b.py').match('c:/*.py')) - self.assertFalse(P('/b.py').match('c:*.py')) - self.assertFalse(P('/b.py').match('c:/*.py')) - # UNC patterns. - self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) - self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) - # Case-insensitivity. - self.assertTrue(P('B.py').match('b.PY')) - self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) - self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) - # Path anchor doesn't match pattern anchor - self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' - self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' - self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' - def test_full_match_common(self): P = self.cls # Simple relative pattern.