#!/usr/bin/env python3 import argparse import subprocess import sys from pathlib import Path DOC_PATTERNS = ( "design/", "docs/", "adr", "architecture", "prd", "spec", "plan", ) CODE_SUFFIXES = { ".py", ".ts", ".tsx", ".js", ".jsx", ".java", ".go", ".rs", ".rb", ".php", ".kt", ".swift", ".scala", ".sh", } CODE_HINTS = ("apps/", "packages/", "scripts/") TEST_HINTS = ("test", "spec", "__tests__", "tests/") CONFIG_SUFFIXES = {".yml", ".yaml", ".json", ".toml", ".ini", ".env"} CONFIG_HINTS = ("docker", "compose", "k8s", "helm", "terraform", ".github/", ".githooks/", ".env") def run_git(repo: Path, *args: str) -> list[str]: result = subprocess.run( ["git", "-C", str(repo), *args], capture_output=True, text=True, check=False, ) if result.returncode != 0: raise RuntimeError(result.stderr.strip() or "git command failed") return [line.rstrip() for line in result.stdout.splitlines() if line.strip()] def classify(path_text: str) -> str: lower = path_text.lower() path = Path(path_text) if any(token in lower for token in TEST_HINTS): return "tests" if any(token in lower for token in DOC_PATTERNS) or path.suffix == ".md": return "docs" if any(token in lower for token in CODE_HINTS): return "code" if path.suffix in CODE_SUFFIXES: return "code" if path.suffix in CONFIG_SUFFIXES or any(token in lower for token in CONFIG_HINTS): return "config" return "other" def print_group(title: str, items: list[str]) -> None: print(f"\n{title}:") if not items: print(" - none") return for item in items: print(f" - {item}") def assess_changes( docs: list[str], code: list[str], tests: list[str], config: list[str], other: list[str], strict: bool, ) -> dict: warnings: list[str] = [] blockers: list[str] = [] if code and not docs: message = "Code changed but no design/doc files changed." warnings.append(message) if strict: blockers.append(message) if config and not docs: message = "Config or deployment files changed without any doc updates." warnings.append(message) if strict: blockers.append(message) if docs and not code and not config and not tests: warnings.append( "Docs changed without code changes. This may be intentional, but verify they still match the repository." ) if code and not tests: warnings.append( "Code changed without any test-file changes. Verify whether tests should change." ) if other: warnings.append( "Unclassified files changed. Confirm they do not affect documented behavior or runtime assumptions." ) return { "warnings": warnings, "blockers": blockers, "blocking": bool(blockers), } def extract_status_paths(lines: list[str]) -> list[str]: return sorted({line[3:] for line in lines if len(line) > 3}) def collect_paths(repo: Path, args: argparse.Namespace) -> list[str]: if args.staged: return run_git(repo, "diff", "--cached", "--name-only", "--diff-filter=ACMR") if args.base_ref: return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", f"{args.base_ref}...HEAD") if args.rev_range: if ".." in args.rev_range: return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", args.rev_range) return run_git(repo, "diff-tree", "--no-commit-id", "--name-only", "-r", args.rev_range) changed = run_git(repo, "status", "--short") return extract_status_paths(changed) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Check whether doc changes track code changes.") parser.add_argument("repo", nargs="?", default=".", help="git repository path") parser.add_argument("--strict", action="store_true", help="fail on blocking drift") parser.add_argument("--staged", action="store_true", help="inspect staged files only") parser.add_argument("--base-ref", help="compare changes from base ref to HEAD") parser.add_argument("--rev-range", help="inspect a git revision range or a single commit") return parser.parse_args() def main() -> int: args = parse_args() repo = Path(args.repo).expanduser().resolve() if not (repo / ".git").exists(): print(f"Not a git repository: {repo}") return 2 paths = sorted(set(collect_paths(repo, args))) docs = [p for p in paths if classify(p) == "docs"] code = [p for p in paths if classify(p) == "code"] tests = [p for p in paths if classify(p) == "tests"] config = [p for p in paths if classify(p) == "config"] other = [p for p in paths if classify(p) == "other"] assessment = assess_changes(docs, code, tests, config, other, args.strict) print(f"Repository: {repo}") print(f"Changed files: {len(paths)}") print_group("Design and doc files", docs) print_group("Code files", code) print_group("Test files", tests) print_group("Config and infra files", config) print_group("Other files", other) print("\nAssessment:") if not assessment["warnings"]: print(" - No obvious doc/code drift detected from changed-file classification.") else: for warning in assessment["warnings"]: print(f" - {warning}") print("\nNext actions:") if code and not docs: print(" - Review design/ or docs/ and update affected architecture, workflow, or API notes.") if docs: print(" - Confirm each changed doc still matches the actual implementation.") if code: print(" - Confirm changed code paths match documented workflow, schema, and runtime assumptions.") if other: print(" - Review unclassified paths and decide whether docs or tests should be updated.") if assessment["blocking"]: print("\nResult: blocking drift detected.") return 1 return 0 if __name__ == "__main__": raise SystemExit(main())