commit 5dc49f6b69c95793447c00c75ede5d0a4632deae Author: Victor Giers Date: Sat Aug 23 15:03:29 2025 +0200 initial commit diff --git a/zipdir.py b/zipdir.py new file mode 100755 index 0000000..0c2ec1d --- /dev/null +++ b/zipdir.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +""" +zipdir.py - Create a ZIP archive from a folder while skipping unwanted files/folders. + +Usage: + python zipdir.py /path/to/source_dir out.zip + python zipdir.py /path/to/source_dir out.zip --exclude "*.mp4" --exclude ".secret*" + python zipdir.py /path/to/source_dir out.zip --zipignore .zipignore + +Default skips include: +- Hidden files & folders (anything with a path segment starting with ".") +- node_modules, package-lock.json +- Python env/cache: venv, .venv, env, __pycache__, .pytest_cache, .mypy_cache, .ruff_cache, .tox, .nox +- VCS/IDE/OS: .git, .hg, .svn, .idea, .vscode, .DS_Store, Thumbs.db +- JS/TS build caches: .next, .nuxt, .svelte-kit, .angular, .parcel-cache, .turbo, .yarn, .pnpm-store, out, .output +- General caches: .cache, .gradle, .terraform, .serverless, .vercel +- Locks/reports/junk: yarn.lock, pnpm-lock.yaml, poetry.lock, Pipfile.lock, .coverage, coverage.xml, *.pyc, *.log, *.tmp, swap files, macOS resource forks + +You can extend ignoring with --exclude globs or a .zipignore file (one glob per line, '#' comments). +""" + +from __future__ import annotations + +import argparse +import fnmatch +import os +from pathlib import Path +import posixpath +import sys +import zipfile +from typing import Iterable, List, Set + +# --- Defaults --- + +DEFAULT_EXCLUDED_DIR_NAMES: Set[str] = { + # VCS / IDE / OS + ".git", ".hg", ".svn", ".idea", ".vscode", ".DS_Store", + # Python + "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", ".ipynb_checkpoints", + ".tox", ".nox", "build", "dist", ".venv", "venv", "env", ".env", + # JS/TS + "node_modules", ".next", ".nuxt", ".svelte-kit", ".angular", ".parcel-cache", + ".turbo", ".yarn", ".pnpm-store", "out", ".output", + # General caches + ".cache", ".gradle", ".terraform", ".serverless", ".vercel", +} + +DEFAULT_EXCLUDED_FILE_NAMES: Set[str] = { + # Locks / metadata + "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "poetry.lock", "Pipfile.lock", + # OS junk + ".DS_Store", "Thumbs.db", "desktop.ini", "Icon\r", + # Coverage / reports + ".coverage", "coverage.xml", +} + +# Globs apply to files OR directories (match against the *relative* posix path from src root) +DEFAULT_EXCLUDED_GLOBS: Set[str] = { + # Python bytecode / extensions + "*.pyc", "*.pyd", "*.pyo", "*.so", + # Editors / temp + "*~", "*.swp", "*.swo", "*.tmp", "*.temp", + # Logs + "*.log", + # Env files / secrets (comment out if you want them) + ".env*", "*.env", "*.env.*", + # Common build outputs (language-agnostic) + "*/coverage/*", "*/.coverage/*", + # macOS resource forks + "._*", +} + +def load_ignore_file(path: Path) -> List[str]: + """Read ignore patterns from a file (one glob per line, '#' for comments).""" + patterns: List[str] = [] + try: + for raw in path.read_text(encoding="utf-8").splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + patterns.append(line) + except FileNotFoundError: + pass + return patterns + +def is_hidden(path: Path) -> bool: + """Return True if any component of path starts with a dot ('.').""" + for part in path.parts: + if part.startswith("."): + return True + return False + +def normalize_rel(path: Path, root: Path) -> str: + """Return POSIX-style relative path from root to path.""" + rel = path.relative_to(root) + return rel.as_posix() + +def should_exclude(rel_posix: str, name: str, is_dir: bool, *, name_dir_set: Set[str], name_file_set: Set[str], glob_set: Iterable[str]) -> bool: + """Decide if an item should be excluded based on name sets, hidden-ness, and glob rules.""" + # Hidden files/folders + if any(seg.startswith(".") for seg in rel_posix.split("/")): + return True + + # Direct name checks + if is_dir and name in name_dir_set: + return True + if not is_dir and name in name_file_set: + return True + + # Glob checks against the relative posix path + for pat in glob_set: + if fnmatch.fnmatchcase(rel_posix, pat): + return True + + return False + +def collect_files(src_dir: Path, excludes: Iterable[str]) -> List[Path]: + """Traverse src_dir and return a list of file Paths that should be included.""" + src_dir = src_dir.resolve() + include_files: List[Path] = [] + + # Compile combined glob set + combined_globs: Set[str] = set(DEFAULT_EXCLUDED_GLOBS) + combined_globs.update(excludes) + + # Walk and prune + for root, dirs, files in os.walk(src_dir, topdown=True, followlinks=False): + root_path = Path(root) + rel_root = root_path.relative_to(src_dir).as_posix() if root_path != src_dir else "" + + # Prune directories in-place + pruned: List[str] = [] + for d in list(dirs): # iterate over a copy since we'll modify dirs + d_rel = posixpath.join(rel_root, d) if rel_root else d + if should_exclude(d_rel, d, True, + name_dir_set=DEFAULT_EXCLUDED_DIR_NAMES, + name_file_set=DEFAULT_EXCLUDED_FILE_NAMES, + glob_set=combined_globs): + pruned.append(d) + if pruned: + dirs[:] = [d for d in dirs if d not in pruned] + + # Files + for f in files: + f_rel = posixpath.join(rel_root, f) if rel_root else f + if should_exclude(f_rel, f, False, + name_dir_set=DEFAULT_EXCLUDED_DIR_NAMES, + name_file_set=DEFAULT_EXCLUDED_FILE_NAMES, + glob_set=combined_globs): + continue + include_files.append(root_path / f) + + return include_files + +def make_zip(src_dir: Path, zip_path: Path, extra_excludes: Iterable[str] = ()) -> int: + """ + Create zip_path from src_dir while skipping default and extra_excludes patterns. + Returns the number of files added. + """ + src_dir = src_dir.resolve() + zip_path = zip_path.resolve() + + # If output zip is inside source tree, exclude it explicitly + extra = list(extra_excludes) + try: + zip_rel = zip_path.relative_to(src_dir).as_posix() + extra.append(zip_rel) + except ValueError: + pass # not inside src + + files = collect_files(src_dir, extra) + files = [p for p in files if p != zip_path] + + zip_path.parent.mkdir(parents=True, exist_ok=True) + + with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6) as zf: + for fp in files: + arcname = fp.relative_to(src_dir).as_posix() + zf.write(fp, arcname) + return len(files) + +def parse_args(argv: List[str]) -> argparse.Namespace: + p = argparse.ArgumentParser(description="Zip a folder while skipping common junk/build/cache files.") + p.add_argument("src", type=Path, help="Source directory to zip") + p.add_argument("out", type=Path, help="Output .zip file path") + p.add_argument("--exclude", "-x", action="append", default=[], help="Extra glob pattern to exclude (can be used multiple times)") + p.add_argument("--zipignore", type=Path, default=None, help="Optional ignore file path (one glob per line). If omitted, '.zipignore' in the source dir is used when present.") + p.add_argument("--list", action="store_true", help="Dry run: list files that would be included and exit") + return p.parse_args(argv) + +def main(argv: List[str] | None = None) -> int: + ns = parse_args(sys.argv[1:] if argv is None else argv) + src: Path = ns.src + out: Path = ns.out + + if not src.exists() or not src.is_dir(): + print(f"Error: source directory not found: {src}", file=sys.stderr) + return 2 + + # Load ignore patterns + extra: List[str] = list(ns.exclude) + ignore_file = ns.zipignore if ns.zipignore is not None else (src / ".zipignore") + extra.extend(load_ignore_file(ignore_file)) + + files = collect_files(src, extra) + + if ns.list: + print(f"Would include {len(files)} files:\n") + for fp in files: + print(fp.relative_to(src).as_posix()) + return 0 + + count = make_zip(src, out, extra_excludes=extra) + print(f"Created {out} with {count} files from {src}") + return 0 + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file