Replace custom fnmatch/hidden logic with PathSpec (GitWildMatchPattern) to get true .gitignore semantics. Highlights: - Supports !, /, and ** patterns; defaults to ignoring dotfiles via .* (can be overridden with negation). - build_ignore_spec composes DEFAULT_EXCLUDED_* plus .zipignore/CLI patterns. - _collect_negation_prefixes prevents over-pruning by keeping dirs that might contain re-included files. - collect_files now matches with spec and prunes topdown only when no negated descendants are possible. - Drop fnmatch import and remove is_hidden, normalize_rel, should_exclude helpers. - On missing dependency, print install hint to stderr and exit(3) (pip install pathspec). BREAKING: introduces runtime dependency on pathspec.
286 lines
10 KiB
Python
Executable File
286 lines
10 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
zipdir.py - Create a ZIP archive from a folder while skipping unwanted files/folders.
|
|
|
|
Usage:
|
|
python zipdir.py /path/to/source_dir out.zip
|
|
python zipdir.py /path/to/source_dir out.zip --exclude "*.mp4" --exclude ".secret*"
|
|
python zipdir.py /path/to/source_dir out.zip --zipignore .zipignore
|
|
|
|
Default skips include:
|
|
- Hidden files & folders (anything with a path segment starting with ".")
|
|
- node_modules, package-lock.json
|
|
- Python env/cache: venv, .venv, env, __pycache__, .pytest_cache, .mypy_cache, .ruff_cache, .tox, .nox
|
|
- VCS/IDE/OS: .git, .hg, .svn, .idea, .vscode, .DS_Store, Thumbs.db
|
|
- JS/TS build caches: .next, .nuxt, .svelte-kit, .angular, .parcel-cache, .turbo, .yarn, .pnpm-store, out, .output
|
|
- General caches: .cache, .gradle, .terraform, .serverless, .vercel
|
|
- Locks/reports/junk: yarn.lock, pnpm-lock.yaml, poetry.lock, Pipfile.lock, .coverage, coverage.xml, *.pyc, *.log, *.tmp, swap files, macOS resource forks
|
|
|
|
You can extend ignoring with --exclude globs or a .zipignore file (one glob per line, '#' comments).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
from pathlib import Path
|
|
import posixpath
|
|
import sys
|
|
import zipfile
|
|
from typing import Iterable, List, Set
|
|
|
|
# Use real .gitignore semantics
|
|
try:
|
|
from pathspec import PathSpec
|
|
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
|
|
except ImportError:
|
|
print(
|
|
"Error: This script now uses 'pathspec' for .gitignore-compatible matching.\n"
|
|
"Install it with:\n python -m pip install pathspec",
|
|
file=sys.stderr,
|
|
)
|
|
raise SystemExit(3)
|
|
|
|
# --- Defaults ---
|
|
|
|
DEFAULT_EXCLUDED_DIR_NAMES: Set[str] = {
|
|
# VCS / IDE / OS
|
|
".git", ".hg", ".svn", ".idea", ".vscode", ".DS_Store",
|
|
# Python
|
|
"__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", ".ipynb_checkpoints",
|
|
".tox", ".nox", "build", "dist", ".venv", "venv", "env", ".env",
|
|
# JS/TS
|
|
"node_modules", ".next", ".nuxt", ".svelte-kit", ".angular", ".parcel-cache",
|
|
".turbo", ".yarn", ".pnpm-store", "out", ".output",
|
|
# General caches
|
|
".cache", ".gradle", ".terraform", ".serverless", ".vercel",
|
|
}
|
|
|
|
DEFAULT_EXCLUDED_FILE_NAMES: Set[str] = {
|
|
# Locks / metadata
|
|
"package-lock.json", "yarn.lock", "pnpm-lock.yaml", "poetry.lock", "Pipfile.lock",
|
|
# OS junk
|
|
".DS_Store", "Thumbs.db", "desktop.ini", "Icon\r",
|
|
# Coverage / reports
|
|
".coverage", "coverage.xml",
|
|
}
|
|
|
|
# Globs apply to files OR directories (match against the *relative* posix path from src root)
|
|
DEFAULT_EXCLUDED_GLOBS: Set[str] = {
|
|
# Python bytecode / extensions
|
|
"*.pyc", "*.pyd", "*.pyo", "*.so",
|
|
# Editors / temp
|
|
"*~", "*.swp", "*.swo", "*.tmp", "*.temp",
|
|
# Logs
|
|
"*.log",
|
|
# Env files / secrets (comment out if you want them)
|
|
".env*", "*.env", "*.env.*",
|
|
# Common build outputs (language-agnostic)
|
|
"*/coverage/*", "*/.coverage/*",
|
|
# macOS resource forks
|
|
"._*",
|
|
}
|
|
|
|
def load_ignore_file(path: Path) -> List[str]:
|
|
"""Read ignore patterns from a file (one glob per line, '#' for comments)."""
|
|
patterns: List[str] = []
|
|
try:
|
|
for raw in path.read_text(encoding="utf-8").splitlines():
|
|
line = raw.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
patterns.append(line)
|
|
except FileNotFoundError:
|
|
pass
|
|
return patterns
|
|
|
|
def _collect_negation_prefixes(patterns: Iterable[str]) -> Set[str]:
|
|
"""
|
|
From a sequence of GitIgnore-style patterns, collect directory prefixes that
|
|
appear in negations (patterns starting with '!'). We use these to avoid
|
|
pruning directories that might contain re-included files.
|
|
"""
|
|
prefixes: Set[str] = set()
|
|
for raw in patterns:
|
|
line = raw.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
if not line.startswith("!"):
|
|
continue
|
|
|
|
pat = line[1:].lstrip("/") # drop '!' and leading root anchor
|
|
if not pat:
|
|
continue
|
|
|
|
# If the pattern ends with '/', it's a directory; otherwise grab parents.
|
|
is_dir_pat = pat.endswith("/")
|
|
path_no_slash = pat[:-1] if is_dir_pat else pat
|
|
parts = [p for p in path_no_slash.split("/") if p]
|
|
|
|
# Add all parent directory prefixes ending with '/'
|
|
if parts:
|
|
accum = ""
|
|
for i in range(len(parts) - (0 if is_dir_pat else 1)):
|
|
accum = f"{accum}{parts[i]}/"
|
|
prefixes.add(accum)
|
|
|
|
# Also, if it explicitly targets a directory, include that directory
|
|
if is_dir_pat:
|
|
prefixes.add(path_no_slash + "/")
|
|
elif len(parts) > 1:
|
|
# file under a dir: add its parent dir
|
|
prefixes.add("/".join(parts[:-1]) + "/")
|
|
|
|
return prefixes
|
|
|
|
|
|
def build_ignore_spec(excludes: Iterable[str]) -> tuple[PathSpec, Set[str]]:
|
|
"""
|
|
Build a PathSpec with .gitignore semantics from defaults + user patterns.
|
|
Returns (spec, negation_prefixes).
|
|
"""
|
|
lines: List[str] = []
|
|
|
|
# Default "hidden everything" like your original behavior (can be overridden via !)
|
|
# In .gitignore semantics, patterns without '/' match in any directory.
|
|
lines.append(".*")
|
|
|
|
# Convert default directory names into dir patterns (match anywhere)
|
|
for d in DEFAULT_EXCLUDED_DIR_NAMES:
|
|
# 'd/' matches that directory at any depth
|
|
lines.append(f"{d}/")
|
|
|
|
# Default file names (match anywhere)
|
|
for f in DEFAULT_EXCLUDED_FILE_NAMES:
|
|
lines.append(f)
|
|
|
|
# Existing glob-style defaults (already POSIX). These work under gitwild too.
|
|
lines.extend(DEFAULT_EXCLUDED_GLOBS)
|
|
|
|
# User/CLI/.zipignore additions (support '/', '**', and '!' negations)
|
|
lines.extend(excludes)
|
|
|
|
spec = PathSpec.from_lines(GitWildMatchPattern, lines)
|
|
neg_prefixes = _collect_negation_prefixes(lines)
|
|
return spec, neg_prefixes
|
|
|
|
|
|
def collect_files(src_dir: Path, excludes: Iterable[str]) -> List[Path]:
|
|
"""
|
|
Traverse src_dir and return a list of file Paths to include, honoring
|
|
.gitignore-style patterns. We prune directories when the spec ignores them
|
|
AND no negation ('!') pattern could re-include something beneath.
|
|
"""
|
|
src_dir = src_dir.resolve()
|
|
include_files: List[Path] = []
|
|
|
|
spec, neg_prefixes = build_ignore_spec(excludes)
|
|
|
|
for root, dirs, files in os.walk(src_dir, topdown=True, followlinks=False):
|
|
root_path = Path(root)
|
|
rel_root = root_path.relative_to(src_dir).as_posix() if root_path != src_dir else ""
|
|
|
|
# Prune directories (but keep if a later '!' could re-include children)
|
|
for d in list(dirs):
|
|
d_rel = (posixpath.join(rel_root, d) if rel_root else d) + "/"
|
|
if spec.match_file(d_rel):
|
|
# If any negation prefix lies inside d_rel, don't prune
|
|
if not any(neg.startswith(d_rel) for neg in neg_prefixes):
|
|
dirs.remove(d)
|
|
|
|
# Files
|
|
for f in files:
|
|
f_rel = posixpath.join(rel_root, f) if rel_root else f
|
|
if spec.match_file(f_rel):
|
|
continue
|
|
include_files.append(root_path / f)
|
|
|
|
return include_files
|
|
|
|
def next_available_path(path: Path) -> Path:
|
|
"""
|
|
If `path` exists, return 'stem-1.suffix', 'stem-2.suffix', ... until unused.
|
|
Example: out.zip -> out-1.zip -> out-2.zip ...
|
|
"""
|
|
path = path.resolve()
|
|
if not path.exists():
|
|
return path
|
|
|
|
parent = path.parent
|
|
stem = path.stem
|
|
suffix = path.suffix
|
|
i = 1
|
|
while True:
|
|
candidate = parent / f"{stem}-{i}{suffix}"
|
|
if not candidate.exists():
|
|
return candidate
|
|
i += 1
|
|
|
|
def make_zip(src_dir: Path, zip_path: Path, extra_excludes: Iterable[str] = ()) -> int:
|
|
"""
|
|
Create zip_path from src_dir while skipping default and extra_excludes patterns.
|
|
Returns the number of files added.
|
|
"""
|
|
src_dir = src_dir.resolve()
|
|
zip_path = zip_path.resolve()
|
|
|
|
# If output zip is inside source tree, exclude it explicitly
|
|
extra = list(extra_excludes)
|
|
try:
|
|
zip_rel = zip_path.relative_to(src_dir).as_posix()
|
|
extra.append(zip_rel)
|
|
except ValueError:
|
|
pass # not inside src
|
|
|
|
files = collect_files(src_dir, extra)
|
|
files = [p for p in files if p != zip_path]
|
|
|
|
zip_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
|
|
for fp in files:
|
|
arcname = fp.relative_to(src_dir).as_posix()
|
|
zf.write(fp, arcname)
|
|
return len(files)
|
|
|
|
def parse_args(argv: List[str]) -> argparse.Namespace:
|
|
p = argparse.ArgumentParser(description="Zip a folder while skipping common junk/build/cache files.")
|
|
p.add_argument("src", type=Path, help="Source directory to zip")
|
|
p.add_argument("out", type=Path, help="Output .zip file path")
|
|
p.add_argument("--exclude", "-x", action="append", default=[], help="Extra glob pattern to exclude (can be used multiple times)")
|
|
p.add_argument("--zipignore", type=Path, default=None, help="Optional ignore file path (one glob per line). If omitted, '.zipignore' in the source dir is used when present.")
|
|
p.add_argument("--list", action="store_true", help="Dry run: list files that would be included and exit")
|
|
return p.parse_args(argv)
|
|
|
|
def main(argv: List[str] | None = None) -> int:
|
|
ns = parse_args(sys.argv[1:] if argv is None else argv)
|
|
src: Path = ns.src
|
|
out: Path = ns.out
|
|
|
|
if not src.exists() or not src.is_dir():
|
|
print(f"Error: source directory not found: {src}", file=sys.stderr)
|
|
return 2
|
|
|
|
# Load ignore patterns
|
|
extra: List[str] = list(ns.exclude)
|
|
ignore_file = ns.zipignore if ns.zipignore is not None else (src / ".zipignore")
|
|
extra.extend(load_ignore_file(ignore_file))
|
|
|
|
# Choose a non-clobbering output path (appends -1, -2, ...)
|
|
final_out = next_available_path(out)
|
|
|
|
if ns.list:
|
|
files = collect_files(src, extra)
|
|
print(f"Would create {final_out} with {len(files)} files:\n")
|
|
for fp in files:
|
|
print(fp.relative_to(src).as_posix())
|
|
return 0
|
|
|
|
count = make_zip(src, final_out, extra_excludes=extra)
|
|
if final_out != out:
|
|
print(f"Note: '{out}' already exists. Using '{final_out.name}'.")
|
|
print(f"Created {final_out} with {count} files from {src}")
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main()) |