Source code for rude.core.file_finder

"""File discovery with gitignore support using fast os.scandir."""

from __future__ import annotations

import os
from collections.abc import Iterator, Sequence
from fnmatch import fnmatch
from pathlib import Path

ALWAYS_SKIP: frozenset[str] = frozenset(
    {
        ".git",
        ".hg",
        ".svn",
        "__pycache__",
        ".venv",
        "venv",
        ".env",
        "env",
        "node_modules",
        ".tox",
        ".nox",
        ".mypy_cache",
        ".pytest_cache",
        ".ruff_cache",
        ".hypothesis",
        "dist",
        "build",
        ".eggs",
        "*.egg-info",
        ".coverage",
        "htmlcov",
        ".ipynb_checkpoints",
    }
)


[docs] def find_python_files(root: Path | str, *, respect_gitignore: bool = True) -> Iterator[Path]: """Find all Python files under root directory.""" root = Path(root).resolve() gitignores: list[_GitIgnore] = [] if respect_gitignore: local = _load_gitignore(root) if local is not None: gitignores.append(local) yield from _scan_directory(root, root, gitignores, respect_gitignore=respect_gitignore)
def _matches_any(gitignores: list[_GitIgnore], path: Path, *, is_dir: bool) -> bool: for gi in gitignores: try: rel = str(path.relative_to(gi._root)) except ValueError: # pragma: no cover continue if is_dir: rel = rel + "/" if gi.match(rel): return True return False def _scan_directory( directory: Path, root: Path, gitignores: list[_GitIgnore], *, respect_gitignore: bool, ) -> Iterator[Path]: # Nested .gitignore: load the local one for this directory (root already loaded). if respect_gitignore and directory != root: local = _load_gitignore(directory) if local is not None: gitignores = [*gitignores, local] try: with os.scandir(directory) as entries: dirs: list[os.DirEntry[str]] = [] files: list[os.DirEntry[str]] = [] for entry in entries: (dirs if entry.is_dir(follow_symlinks=False) else files).append(entry) for entry in sorted(files, key=lambda e: e.name): if not entry.name.endswith(".py"): continue path = Path(entry.path) if _matches_any(gitignores, path, is_dir=False): continue yield path for entry in sorted(dirs, key=lambda e: e.name): name = entry.name if name.startswith(".") or name in ALWAYS_SKIP: continue if any(fnmatch(name, p) for p in ALWAYS_SKIP if "*" in p): continue dir_path = Path(entry.path) if _matches_any(gitignores, dir_path, is_dir=True): continue yield from _scan_directory( dir_path, root, gitignores, respect_gitignore=respect_gitignore ) except PermissionError: pass class _GitIgnore: def __init__(self, patterns: list[str], root: Path) -> None: self._root = root self._positive: list[tuple[str, bool]] = [] self._negated: list[tuple[str, bool]] = [] for p in patterns: p = p.strip() if not p or p.startswith("#"): continue target = self._negated if p.startswith("!") else self._positive if p.startswith("!"): p = p[1:] is_dir_only = p.endswith("/") if is_dir_only: p = p[:-1] target.append((p, is_dir_only)) def match(self, path: str) -> bool: is_dir = path.endswith("/") if is_dir: path = path[:-1] matched = False for pattern, dir_only in self._positive: if dir_only and not is_dir: continue if self._match_pattern(pattern, path): matched = True break if matched: for pattern, dir_only in self._negated: if dir_only and not is_dir: continue if self._match_pattern(pattern, path): return False return matched def _match_pattern(self, pattern: str, path: str) -> bool: if "/" in pattern: if pattern.startswith("/"): pattern = pattern[1:] return fnmatch(path, pattern) or fnmatch(path, f"**/{pattern}") return any(fnmatch(part, pattern) for part in path.split("/")) def _load_gitignore(root: Path) -> _GitIgnore | None: path = root / ".gitignore" if not path.exists(): return None try: return _GitIgnore(path.read_text(encoding="utf-8").splitlines(), root) except OSError: return None
[docs] def resolve_paths(paths: Sequence[str | Path]) -> Iterator[Path]: """Resolve paths (files or directories) to Python files.""" for p in paths: p = Path(p) if not p.exists(): continue if p.is_file() and p.suffix == ".py": yield p elif p.is_dir(): yield from find_python_files(p)
__all__ = ["ALWAYS_SKIP", "find_python_files", "resolve_paths"]