EmboFlow/scripts/check_commit_message.py

127 lines
3.8 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import re
import subprocess
import sys
from pathlib import Path
SHORTCODE_PREFIX = re.compile(r"^:[a-z0-9_+-]+:\s+")
EMOJI_PREFIX = re.compile(r"^[\u2600-\u27BF\U0001F300-\U0001FAFF]\s+")
def strip_prefix(message: str) -> str:
if SHORTCODE_PREFIX.match(message):
return SHORTCODE_PREFIX.sub("", message, count=1)
if EMOJI_PREFIX.match(message):
return EMOJI_PREFIX.sub("", message, count=1)
return message
def validate_message(message: str) -> list[str]:
lines = [line.rstrip("\n") for line in message.splitlines()]
cleaned_lines = [line for line in lines if line and not line.startswith("#")]
if not cleaned_lines:
return ["Commit message must not be empty."]
subject = cleaned_lines[0]
errors: list[str] = []
if not SHORTCODE_PREFIX.match(subject) and not EMOJI_PREFIX.match(subject):
errors.append("Commit subject must start with a gitmoji shortcode or emoji.")
body = "\n".join(cleaned_lines)
normalized = strip_prefix(subject) + ("\n" + "\n".join(cleaned_lines[1:]) if len(cleaned_lines) > 1 else "")
try:
normalized.encode("ascii")
except UnicodeEncodeError:
errors.append("Commit message must be written in English ASCII text after the gitmoji prefix.")
if not strip_prefix(subject).strip():
errors.append("Commit subject must include an English summary after the gitmoji prefix.")
if re.search(r"[\u4e00-\u9fff]", body):
errors.append("Commit message must not contain Chinese characters.")
return errors
def read_message_file(path: Path) -> str:
return path.read_text(encoding="utf-8")
def run_git(*args: str) -> list[str]:
result = subprocess.run(
["git", *args],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
raise RuntimeError(result.stderr.strip() or "git command failed")
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
def commit_messages_from_range(rev_range: str) -> list[tuple[str, str]]:
if ".." in rev_range:
shas = run_git("rev-list", rev_range)
else:
shas = [rev_range]
messages: list[tuple[str, str]] = []
for sha in shas:
message = subprocess.run(
["git", "log", "--format=%B", "-n", "1", sha],
capture_output=True,
text=True,
check=False,
)
if message.returncode != 0:
raise RuntimeError(message.stderr.strip() or "git log failed")
messages.append((sha, message.stdout.strip()))
return messages
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Validate commit message format.")
parser.add_argument("--file", help="path to commit message file")
parser.add_argument("--rev-range", help="git revision range or single commit")
return parser.parse_args()
def main() -> int:
args = parse_args()
if bool(args.file) == bool(args.rev_range):
print("Use exactly one of --file or --rev-range.")
return 2
failures: list[str] = []
if args.file:
message = read_message_file(Path(args.file))
errors = validate_message(message)
if errors:
failures.extend(errors)
else:
for sha, message in commit_messages_from_range(args.rev_range):
errors = validate_message(message)
for error in errors:
failures.append(f"{sha[:12]}: {error}")
if failures:
print("Commit message validation failed:")
for failure in failures:
print(f" - {failure}")
print("\nExpected format example:")
print(" :sparkles: add hook templates and CI guardrails")
return 1
print("Commit message validation passed.")
return 0
if __name__ == "__main__":
raise SystemExit(main())