feat(zipdir)!: adopt .gitignore-compatible matching via pathspec; safer dir pruning
Replace custom fnmatch/hidden logic with PathSpec (GitWildMatchPattern) to get true .gitignore semantics. Highlights: - Supports !, /, and ** patterns; defaults to ignoring dotfiles via .* (can be overridden with negation). - build_ignore_spec composes DEFAULT_EXCLUDED_* plus .zipignore/CLI patterns. - _collect_negation_prefixes prevents over-pruning by keeping dirs that might contain re-included files. - collect_files now matches with spec and prunes topdown only when no negated descendants are possible. - Drop fnmatch import and remove is_hidden, normalize_rel, should_exclude helpers. - On missing dependency, print install hint to stderr and exit(3) (pip install pathspec). BREAKING: introduces runtime dependency on pathspec.
This commit is contained in:
137
zipdir.py
137
zipdir.py
@@ -22,7 +22,6 @@ You can extend ignoring with --exclude globs or a .zipignore file (one glob per
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import fnmatch
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import posixpath
|
import posixpath
|
||||||
@@ -30,6 +29,18 @@ import sys
|
|||||||
import zipfile
|
import zipfile
|
||||||
from typing import Iterable, List, Set
|
from typing import Iterable, List, Set
|
||||||
|
|
||||||
|
# Use real .gitignore semantics
|
||||||
|
try:
|
||||||
|
from pathspec import PathSpec
|
||||||
|
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
|
||||||
|
except ImportError:
|
||||||
|
print(
|
||||||
|
"Error: This script now uses 'pathspec' for .gitignore-compatible matching.\n"
|
||||||
|
"Install it with:\n python -m pip install pathspec",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
raise SystemExit(3)
|
||||||
|
|
||||||
# --- Defaults ---
|
# --- Defaults ---
|
||||||
|
|
||||||
DEFAULT_EXCLUDED_DIR_NAMES: Set[str] = {
|
DEFAULT_EXCLUDED_DIR_NAMES: Set[str] = {
|
||||||
@@ -83,70 +94,104 @@ def load_ignore_file(path: Path) -> List[str]:
|
|||||||
pass
|
pass
|
||||||
return patterns
|
return patterns
|
||||||
|
|
||||||
def is_hidden(path: Path) -> bool:
|
def _collect_negation_prefixes(patterns: Iterable[str]) -> Set[str]:
|
||||||
"""Return True if any component of path starts with a dot ('.')."""
|
"""
|
||||||
for part in path.parts:
|
From a sequence of GitIgnore-style patterns, collect directory prefixes that
|
||||||
if part.startswith("."):
|
appear in negations (patterns starting with '!'). We use these to avoid
|
||||||
return True
|
pruning directories that might contain re-included files.
|
||||||
return False
|
"""
|
||||||
|
prefixes: Set[str] = set()
|
||||||
|
for raw in patterns:
|
||||||
|
line = raw.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
if not line.startswith("!"):
|
||||||
|
continue
|
||||||
|
|
||||||
def normalize_rel(path: Path, root: Path) -> str:
|
pat = line[1:].lstrip("/") # drop '!' and leading root anchor
|
||||||
"""Return POSIX-style relative path from root to path."""
|
if not pat:
|
||||||
rel = path.relative_to(root)
|
continue
|
||||||
return rel.as_posix()
|
|
||||||
|
|
||||||
def should_exclude(rel_posix: str, name: str, is_dir: bool, *, name_dir_set: Set[str], name_file_set: Set[str], glob_set: Iterable[str]) -> bool:
|
# If the pattern ends with '/', it's a directory; otherwise grab parents.
|
||||||
"""Decide if an item should be excluded based on name sets, hidden-ness, and glob rules."""
|
is_dir_pat = pat.endswith("/")
|
||||||
# Hidden files/folders
|
path_no_slash = pat[:-1] if is_dir_pat else pat
|
||||||
if any(seg.startswith(".") for seg in rel_posix.split("/")):
|
parts = [p for p in path_no_slash.split("/") if p]
|
||||||
return True
|
|
||||||
|
|
||||||
# Direct name checks
|
# Add all parent directory prefixes ending with '/'
|
||||||
if is_dir and name in name_dir_set:
|
if parts:
|
||||||
return True
|
accum = ""
|
||||||
if not is_dir and name in name_file_set:
|
for i in range(len(parts) - (0 if is_dir_pat else 1)):
|
||||||
return True
|
accum = f"{accum}{parts[i]}/"
|
||||||
|
prefixes.add(accum)
|
||||||
|
|
||||||
# Glob checks against the relative posix path
|
# Also, if it explicitly targets a directory, include that directory
|
||||||
for pat in glob_set:
|
if is_dir_pat:
|
||||||
if fnmatch.fnmatchcase(rel_posix, pat):
|
prefixes.add(path_no_slash + "/")
|
||||||
return True
|
elif len(parts) > 1:
|
||||||
|
# file under a dir: add its parent dir
|
||||||
|
prefixes.add("/".join(parts[:-1]) + "/")
|
||||||
|
|
||||||
|
return prefixes
|
||||||
|
|
||||||
|
|
||||||
|
def build_ignore_spec(excludes: Iterable[str]) -> tuple[PathSpec, Set[str]]:
|
||||||
|
"""
|
||||||
|
Build a PathSpec with .gitignore semantics from defaults + user patterns.
|
||||||
|
Returns (spec, negation_prefixes).
|
||||||
|
"""
|
||||||
|
lines: List[str] = []
|
||||||
|
|
||||||
|
# Default "hidden everything" like your original behavior (can be overridden via !)
|
||||||
|
# In .gitignore semantics, patterns without '/' match in any directory.
|
||||||
|
lines.append(".*")
|
||||||
|
|
||||||
|
# Convert default directory names into dir patterns (match anywhere)
|
||||||
|
for d in DEFAULT_EXCLUDED_DIR_NAMES:
|
||||||
|
# 'd/' matches that directory at any depth
|
||||||
|
lines.append(f"{d}/")
|
||||||
|
|
||||||
|
# Default file names (match anywhere)
|
||||||
|
for f in DEFAULT_EXCLUDED_FILE_NAMES:
|
||||||
|
lines.append(f)
|
||||||
|
|
||||||
|
# Existing glob-style defaults (already POSIX). These work under gitwild too.
|
||||||
|
lines.extend(DEFAULT_EXCLUDED_GLOBS)
|
||||||
|
|
||||||
|
# User/CLI/.zipignore additions (support '/', '**', and '!' negations)
|
||||||
|
lines.extend(excludes)
|
||||||
|
|
||||||
|
spec = PathSpec.from_lines(GitWildMatchPattern, lines)
|
||||||
|
neg_prefixes = _collect_negation_prefixes(lines)
|
||||||
|
return spec, neg_prefixes
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def collect_files(src_dir: Path, excludes: Iterable[str]) -> List[Path]:
|
def collect_files(src_dir: Path, excludes: Iterable[str]) -> List[Path]:
|
||||||
"""Traverse src_dir and return a list of file Paths that should be included."""
|
"""
|
||||||
|
Traverse src_dir and return a list of file Paths to include, honoring
|
||||||
|
.gitignore-style patterns. We prune directories when the spec ignores them
|
||||||
|
AND no negation ('!') pattern could re-include something beneath.
|
||||||
|
"""
|
||||||
src_dir = src_dir.resolve()
|
src_dir = src_dir.resolve()
|
||||||
include_files: List[Path] = []
|
include_files: List[Path] = []
|
||||||
|
|
||||||
# Compile combined glob set
|
spec, neg_prefixes = build_ignore_spec(excludes)
|
||||||
combined_globs: Set[str] = set(DEFAULT_EXCLUDED_GLOBS)
|
|
||||||
combined_globs.update(excludes)
|
|
||||||
|
|
||||||
# Walk and prune
|
|
||||||
for root, dirs, files in os.walk(src_dir, topdown=True, followlinks=False):
|
for root, dirs, files in os.walk(src_dir, topdown=True, followlinks=False):
|
||||||
root_path = Path(root)
|
root_path = Path(root)
|
||||||
rel_root = root_path.relative_to(src_dir).as_posix() if root_path != src_dir else ""
|
rel_root = root_path.relative_to(src_dir).as_posix() if root_path != src_dir else ""
|
||||||
|
|
||||||
# Prune directories in-place
|
# Prune directories (but keep if a later '!' could re-include children)
|
||||||
pruned: List[str] = []
|
for d in list(dirs):
|
||||||
for d in list(dirs): # iterate over a copy since we'll modify dirs
|
d_rel = (posixpath.join(rel_root, d) if rel_root else d) + "/"
|
||||||
d_rel = posixpath.join(rel_root, d) if rel_root else d
|
if spec.match_file(d_rel):
|
||||||
if should_exclude(d_rel, d, True,
|
# If any negation prefix lies inside d_rel, don't prune
|
||||||
name_dir_set=DEFAULT_EXCLUDED_DIR_NAMES,
|
if not any(neg.startswith(d_rel) for neg in neg_prefixes):
|
||||||
name_file_set=DEFAULT_EXCLUDED_FILE_NAMES,
|
dirs.remove(d)
|
||||||
glob_set=combined_globs):
|
|
||||||
pruned.append(d)
|
|
||||||
if pruned:
|
|
||||||
dirs[:] = [d for d in dirs if d not in pruned]
|
|
||||||
|
|
||||||
# Files
|
# Files
|
||||||
for f in files:
|
for f in files:
|
||||||
f_rel = posixpath.join(rel_root, f) if rel_root else f
|
f_rel = posixpath.join(rel_root, f) if rel_root else f
|
||||||
if should_exclude(f_rel, f, False,
|
if spec.match_file(f_rel):
|
||||||
name_dir_set=DEFAULT_EXCLUDED_DIR_NAMES,
|
|
||||||
name_file_set=DEFAULT_EXCLUDED_FILE_NAMES,
|
|
||||||
glob_set=combined_globs):
|
|
||||||
continue
|
continue
|
||||||
include_files.append(root_path / f)
|
include_files.append(root_path / f)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user