#!/usr/bin/env python3 import argparse import re import subprocess import sys from pathlib import Path SHORTCODE_PREFIX = re.compile(r"^:[a-z0-9_+-]+:\s+") EMOJI_PREFIX = re.compile(r"^[\u2600-\u27BF\U0001F300-\U0001FAFF]\s+") def strip_prefix(message: str) -> str: if SHORTCODE_PREFIX.match(message): return SHORTCODE_PREFIX.sub("", message, count=1) if EMOJI_PREFIX.match(message): return EMOJI_PREFIX.sub("", message, count=1) return message def validate_message(message: str) -> list[str]: lines = [line.rstrip("\n") for line in message.splitlines()] cleaned_lines = [line for line in lines if line and not line.startswith("#")] if not cleaned_lines: return ["Commit message must not be empty."] subject = cleaned_lines[0] errors: list[str] = [] if not SHORTCODE_PREFIX.match(subject) and not EMOJI_PREFIX.match(subject): errors.append("Commit subject must start with a gitmoji shortcode or emoji.") body = "\n".join(cleaned_lines) normalized = strip_prefix(subject) + ("\n" + "\n".join(cleaned_lines[1:]) if len(cleaned_lines) > 1 else "") try: normalized.encode("ascii") except UnicodeEncodeError: errors.append("Commit message must be written in English ASCII text after the gitmoji prefix.") if not strip_prefix(subject).strip(): errors.append("Commit subject must include an English summary after the gitmoji prefix.") if re.search(r"[\u4e00-\u9fff]", body): errors.append("Commit message must not contain Chinese characters.") return errors def read_message_file(path: Path) -> str: return path.read_text(encoding="utf-8") def run_git(*args: str) -> list[str]: result = subprocess.run( ["git", *args], capture_output=True, text=True, check=False, ) if result.returncode != 0: raise RuntimeError(result.stderr.strip() or "git command failed") return [line.strip() for line in result.stdout.splitlines() if line.strip()] def commit_messages_from_range(rev_range: str) -> list[tuple[str, str]]: if ".." in rev_range: shas = run_git("rev-list", rev_range) else: shas = [rev_range] messages: list[tuple[str, str]] = [] for sha in shas: message = subprocess.run( ["git", "log", "--format=%B", "-n", "1", sha], capture_output=True, text=True, check=False, ) if message.returncode != 0: raise RuntimeError(message.stderr.strip() or "git log failed") messages.append((sha, message.stdout.strip())) return messages def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Validate commit message format.") parser.add_argument("--file", help="path to commit message file") parser.add_argument("--rev-range", help="git revision range or single commit") return parser.parse_args() def main() -> int: args = parse_args() if bool(args.file) == bool(args.rev_range): print("Use exactly one of --file or --rev-range.") return 2 failures: list[str] = [] if args.file: message = read_message_file(Path(args.file)) errors = validate_message(message) if errors: failures.extend(errors) else: for sha, message in commit_messages_from_range(args.rev_range): errors = validate_message(message) for error in errors: failures.append(f"{sha[:12]}: {error}") if failures: print("Commit message validation failed:") for failure in failures: print(f" - {failure}") print("\nExpected format example:") print(" :sparkles: add hook templates and CI guardrails") return 1 print("Commit message validation passed.") return 0 if __name__ == "__main__": raise SystemExit(main())