EmboFlow/scripts/check_doc_code_sync.py

199 lines
6.1 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import subprocess
import sys
from pathlib import Path
DOC_PATTERNS = (
"design/",
"docs/",
"adr",
"architecture",
"prd",
"spec",
"plan",
)
CODE_SUFFIXES = {
".py",
".ts",
".tsx",
".js",
".jsx",
".java",
".go",
".rs",
".rb",
".php",
".kt",
".swift",
".scala",
".sh",
}
CODE_HINTS = ("apps/", "packages/", "scripts/")
TEST_HINTS = ("test", "spec", "__tests__", "tests/")
CONFIG_SUFFIXES = {".yml", ".yaml", ".json", ".toml", ".ini", ".env"}
CONFIG_HINTS = ("docker", "compose", "k8s", "helm", "terraform", ".github/", ".githooks/", ".env")
def run_git(repo: Path, *args: str) -> list[str]:
result = subprocess.run(
["git", "-C", str(repo), *args],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
raise RuntimeError(result.stderr.strip() or "git command failed")
return [line.rstrip() for line in result.stdout.splitlines() if line.strip()]
def classify(path_text: str) -> str:
lower = path_text.lower()
path = Path(path_text)
if any(token in lower for token in DOC_PATTERNS) or path.suffix == ".md":
return "docs"
if any(token in lower for token in TEST_HINTS):
return "tests"
if any(token in lower for token in CODE_HINTS):
return "code"
if path.suffix in CODE_SUFFIXES:
return "code"
if path.suffix in CONFIG_SUFFIXES or any(token in lower for token in CONFIG_HINTS):
return "config"
return "other"
def print_group(title: str, items: list[str]) -> None:
print(f"\n{title}:")
if not items:
print(" - none")
return
for item in items:
print(f" - {item}")
def assess_changes(
docs: list[str],
code: list[str],
tests: list[str],
config: list[str],
other: list[str],
strict: bool,
) -> dict:
warnings: list[str] = []
blockers: list[str] = []
if code and not docs:
message = "Code changed but no design/doc files changed."
warnings.append(message)
if strict:
blockers.append(message)
if config and not docs:
message = "Config or deployment files changed without any doc updates."
warnings.append(message)
if strict:
blockers.append(message)
if docs and not code and not config and not tests:
warnings.append(
"Docs changed without code changes. This may be intentional, but verify they still match the repository."
)
if code and not tests:
warnings.append(
"Code changed without any test-file changes. Verify whether tests should change."
)
if other:
warnings.append(
"Unclassified files changed. Confirm they do not affect documented behavior or runtime assumptions."
)
return {
"warnings": warnings,
"blockers": blockers,
"blocking": bool(blockers),
}
def extract_status_paths(lines: list[str]) -> list[str]:
return sorted({line[3:] for line in lines if len(line) > 3})
def collect_paths(repo: Path, args: argparse.Namespace) -> list[str]:
if args.staged:
return run_git(repo, "diff", "--cached", "--name-only", "--diff-filter=ACMR")
if args.base_ref:
return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", f"{args.base_ref}...HEAD")
if args.rev_range:
if ".." in args.rev_range:
return run_git(repo, "diff", "--name-only", "--diff-filter=ACMR", args.rev_range)
return run_git(repo, "diff-tree", "--no-commit-id", "--name-only", "-r", args.rev_range)
changed = run_git(repo, "status", "--short")
return extract_status_paths(changed)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Check whether doc changes track code changes.")
parser.add_argument("repo", nargs="?", default=".", help="git repository path")
parser.add_argument("--strict", action="store_true", help="fail on blocking drift")
parser.add_argument("--staged", action="store_true", help="inspect staged files only")
parser.add_argument("--base-ref", help="compare changes from base ref to HEAD")
parser.add_argument("--rev-range", help="inspect a git revision range or a single commit")
return parser.parse_args()
def main() -> int:
args = parse_args()
repo = Path(args.repo).expanduser().resolve()
if not (repo / ".git").exists():
print(f"Not a git repository: {repo}")
return 2
paths = sorted(set(collect_paths(repo, args)))
docs = [p for p in paths if classify(p) == "docs"]
code = [p for p in paths if classify(p) == "code"]
tests = [p for p in paths if classify(p) == "tests"]
config = [p for p in paths if classify(p) == "config"]
other = [p for p in paths if classify(p) == "other"]
assessment = assess_changes(docs, code, tests, config, other, args.strict)
print(f"Repository: {repo}")
print(f"Changed files: {len(paths)}")
print_group("Design and doc files", docs)
print_group("Code files", code)
print_group("Test files", tests)
print_group("Config and infra files", config)
print_group("Other files", other)
print("\nAssessment:")
if not assessment["warnings"]:
print(" - No obvious doc/code drift detected from changed-file classification.")
else:
for warning in assessment["warnings"]:
print(f" - {warning}")
print("\nNext actions:")
if code and not docs:
print(" - Review design/ or docs/ and update affected architecture, workflow, or API notes.")
if docs:
print(" - Confirm each changed doc still matches the actual implementation.")
if code:
print(" - Confirm changed code paths match documented workflow, schema, and runtime assumptions.")
if other:
print(" - Review unclassified paths and decide whether docs or tests should be updated.")
if assessment["blocking"]:
print("\nResult: blocking drift detected.")
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())