#!/usr/bin/env python3 """First-pass .repo-classification.yaml author for CUST-WP-0050 T11. Writes agent-classified files for local git checkouts, validates against canon/standards/repo-classification.allowed.yaml, and optionally commits. Skips repos that already carry human-reviewed classifications and slugs on the exclusion list. """ from __future__ import annotations import argparse import json import subprocess import sys import urllib.request from datetime import date from pathlib import Path import yaml REPO_ROOT = Path(__file__).resolve().parent.parent ALLOWED_PATH = REPO_ROOT / "canon" / "standards" / "repo-classification.allowed.yaml" EXCLUSIONS_PATH = REPO_ROOT / "canon" / "standards" / "repo-classification.exclusions.yaml" VALIDATOR = REPO_ROOT / "tools" / "validate_repo_classification.py" HOME = Path.home() API_BASE = "http://127.0.0.1:8000" CLASSIFIED_AT = date.today().isoformat() # Curated overrides — standard §13 examples and T11 first-pass judgments. OVERRIDES: dict[str, dict] = { "helix-forge": { "category": "product", "domain": "infotech", "secondary_domains": ["agents"], "capability_tags": [ "platform", "capability-registry", "coordination", "knowledge", "product-development", ], "business_stake": ["product", "technology", "execution", "automation", "intelligence"], "business_mechanics": ["intention", "coordination", "operation", "adaptation"], "notes": "Capability development platform; standard §13.1 example.", }, "identity-canon": { "category": "research", "domain": "infotech", "secondary_domains": ["government"], "capability_tags": ["identity", "access-control", "terminology", "canon", "governance"], "business_stake": ["technology", "legal", "operations", "intelligence"], "business_mechanics": ["intention", "control", "adaptation"], "notes": "Identity terminology and canon; standard §13.3 example.", }, "net-kingdom": { "category": "product", "domain": "infotech", "secondary_domains": [], "capability_tags": ["security", "identity", "platform", "operations", "access-control"], "business_stake": ["technology", "operations", "legal", "automation"], "business_mechanics": ["control", "operation", "adaptation"], "notes": "NetKingdom security/identity platform; standard §13.4 example.", }, "citation-evidence": { "category": "product", "domain": "infotech", "secondary_domains": ["communication", "government"], "capability_tags": ["citations", "evidence", "knowledge", "traceability", "source-management"], "business_stake": ["intelligence", "legal", "product", "technology"], "business_mechanics": ["control", "coordination", "adaptation"], "notes": "Citation and evidence product; standard §13.5 example.", }, "adaptive-pricing": { "category": "product", "domain": "financials", "secondary_domains": ["infotech", "agents"], "capability_tags": ["pricing", "monetization", "lifecycle", "decision-support", "product-development"], "business_stake": ["finance", "product", "sales", "intelligence", "automation"], "business_mechanics": ["intention", "control", "adaptation"], "notes": "Adaptive pricing product; standard §13.6 example.", }, "reuse-surface": { "category": "product", "domain": "infotech", "secondary_domains": ["agents"], "capability_tags": ["capability-registry", "discovery", "reuse", "maturity", "evidence"], "business_stake": ["technology", "product", "intelligence", "automation"], "business_mechanics": ["intention", "control", "adaptation"], "notes": "Reuse discovery surface; standard §13.7 example.", }, "audit-core": { "category": "tooling", "domain": "infotech", "secondary_domains": [], "capability_tags": ["audit", "traceability", "security", "governance", "operations"], "business_stake": ["technology", "operations", "legal", "automation"], "business_mechanics": ["control", "operation"], "notes": "Multi-tenant audit emit capability for platform bootstrap wiring.", }, "whynot-design": { "category": "product", "domain": "consumer", "secondary_domains": ["communication"], "capability_tags": ["design-system", "documentation", "product-development", "experience"], "business_stake": ["product", "experience", "technology"], "business_mechanics": ["intention", "coordination", "adaptation"], "notes": "whynot visual language — tokens, CSS, and web components for prototype artefacts.", }, "coordination-engine": { "category": "product", "domain": "communication", "secondary_domains": ["infotech", "agents"], "capability_tags": ["coordination", "workflow", "orchestration", "evidence", "platform"], "business_stake": ["product", "technology", "operations", "automation"], "business_mechanics": ["coordination", "operation", "adaptation"], "notes": "Goal-driven digital coordination framework and adapter runtime.", }, "human-resources": { "category": "research", "domain": "consumer", "secondary_domains": ["health"], "capability_tags": ["knowledge", "documentation", "product-development"], "business_stake": ["people", "product", "experience"], "business_mechanics": ["intention", "adaptation"], "notes": "Research toward optimal human performance.", }, "repo-seed": { "category": "tooling", "domain": "infotech", "secondary_domains": [], "capability_tags": ["platform", "configuration", "documentation"], "business_stake": ["technology", "execution"], "business_mechanics": ["operation"], "notes": "Git template for bootstrapping coulomb projects.", }, "tegwick-control": { "category": "research", "domain": "consumer", "secondary_domains": ["infotech"], "capability_tags": ["coordination", "governance", "documentation", "knowledge"], "business_stake": ["people", "operations", "intelligence"], "business_mechanics": ["intention", "coordination", "adaptation"], "notes": "Personal control repository for life/projects landscape.", }, "whynot-control": { "category": "research", "domain": "consumer", "secondary_domains": ["communication"], "capability_tags": ["product-development", "knowledge", "coordination", "documentation"], "business_stake": ["product", "experience", "intelligence"], "business_mechanics": ["intention", "coordination", "adaptation"], "notes": "whynot prototype and market-signal control repository.", }, "markitect-main": { "category": "product", "domain": "communication", "secondary_domains": ["infotech", "agents"], "capability_tags": ["knowledge", "documentation", "product-development", "platform"], "business_stake": ["product", "technology", "execution"], "business_mechanics": ["intention", "coordination", "operation", "adaptation"], "notes": "Markitect main product repo; successor to archived markitect-project.", }, "vantage-point": { "category": "research", "domain": "infotech", "secondary_domains": [], "capability_tags": ["knowledge", "analytics", "platform", "documentation"], "business_stake": ["technology", "intelligence", "product"], "business_mechanics": ["intention", "adaptation"], "notes": "Network-based graph model exploration and dependency reasoning framework.", }, } DOMAIN_DEFAULT_STAKE: dict[str, list[str]] = { "infotech": ["technology", "product", "operations"], "financials": ["finance", "technology", "operations"], "communication": ["product", "experience", "technology"], "consumer": ["product", "experience"], "agents": ["technology", "automation", "product"], "government": ["legal", "operations", "technology"], "health": ["product", "experience", "operations"], "realestate": ["finance", "operations"], } SLUG_TAG_HINTS: dict[str, list[str]] = { "citation": ["citations", "evidence", "knowledge"], "evidence": ["evidence", "traceability", "source-management"], "railiance": ["platform", "operations"], "markitect": ["knowledge", "documentation"], "marki": ["knowledge", "documentation"], "ops-": ["operations", "platform"], "canon": ["canon", "knowledge", "governance"], "flex-auth": ["identity", "access-control", "policy"], "key-cape": ["identity", "access-control", "security"], "shard-wiki": ["knowledge", "documentation"], "vergabe": ["procurement", "governance"], "agentic": ["automation", "orchestration"], "repo-scoping": ["governance", "policy", "coordination"], } def load_excluded_slugs() -> set[str]: with EXCLUSIONS_PATH.open() as fh: doc = yaml.safe_load(fh) return {entry["slug"].split("/")[-1] for entry in doc.get("exclusions", [])} def fetch_hub_repos() -> dict[str, dict]: with urllib.request.urlopen(f"{API_BASE}/repos/", timeout=30) as resp: repos = json.load(resp) return {r["slug"]: r for r in repos} def local_git_repos() -> dict[str, Path]: found: dict[str, Path] = {} for child in HOME.iterdir(): if child.name.startswith(".") or not child.is_dir(): continue if (child / ".git").is_dir(): found[child.name] = child return found def _tag_hints(slug: str) -> list[str]: tags: list[str] = [] for needle, hint_tags in SLUG_TAG_HINTS.items(): if needle in slug: for tag in hint_tags: if tag not in tags: tags.append(tag) return tags def _enrich_from_hub(slug: str, hub: dict | None) -> dict: if hub: category = hub.get("category") or "project" domain = hub.get("domain_slug") or "infotech" secondary = hub.get("secondary_domains") or [] tags = list(hub.get("capability_tags") or []) stake = list(hub.get("business_stake") or []) mechanics = list(hub.get("business_mechanics") or []) else: category = "project" domain = "infotech" secondary = [] tags = [] stake = [] mechanics = [] for tag in _tag_hints(slug): if tag not in tags: tags.append(tag) if not stake: stake = list(DOMAIN_DEFAULT_STAKE.get(domain, ["technology", "product"])) if not mechanics: mechanics = ["coordination", "operation"] return { "category": category, "domain": domain, "secondary_domains": secondary, "capability_tags": tags, "business_stake": stake, "business_mechanics": mechanics, "notes": f"First-pass agent classification (CUST-WP-0050 T11); derived from hub migration row.", } def build_classification(slug: str, hub: dict | None) -> dict: if slug in OVERRIDES: data = dict(OVERRIDES[slug]) else: data = _enrich_from_hub(slug, hub) data.pop("notes", None) block = { "repo_classification": { "standard": "Repo Classification Standard", "version": "1.0", "classified_at": CLASSIFIED_AT, "classified_by": "agent", **{k: v for k, v in data.items() if k != "notes"}, } } if "notes" in OVERRIDES.get(slug, {}) or "notes" in data: note = OVERRIDES.get(slug, {}).get("notes") or data.get("notes") if note: block["repo_classification"]["notes"] = note return block def render_yaml(block: dict) -> str: return yaml.dump(block, sort_keys=False, allow_unicode=True, default_flow_style=False) def validate_file(path: Path) -> bool: proc = subprocess.run( [sys.executable, str(VALIDATOR), str(path)], capture_output=True, text=True, ) if proc.returncode != 0: print(proc.stdout) print(proc.stderr, file=sys.stderr) return proc.returncode == 0 def maybe_commit(repo_path: Path, dry_run: bool) -> None: if dry_run: return subprocess.run(["git", "add", ".repo-classification.yaml"], cwd=repo_path, check=True) status = subprocess.run( ["git", "diff", "--cached", "--quiet"], cwd=repo_path, ) if status.returncode != 0: subprocess.run( [ "git", "commit", "-m", "Add .repo-classification.yaml (CUST-WP-0050 T11 agent first-pass)", ], cwd=repo_path, check=True, ) def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--dry-run", action="store_true", help="Print actions only") parser.add_argument("--no-commit", action="store_true", help="Write files but skip git commit") args = parser.parse_args() excluded = load_excluded_slugs() hub_repos = fetch_hub_repos() local_repos = local_git_repos() targets = sorted(set(local_repos) - excluded) written: list[str] = [] skipped_human: list[str] = [] failed: list[str] = [] for slug in targets: repo_path = local_repos[slug] target = repo_path / ".repo-classification.yaml" if target.exists(): with target.open() as fh: existing = yaml.safe_load(fh) or {} if existing.get("repo_classification", {}).get("classified_by") == "human": skipped_human.append(slug) continue block = build_classification(slug, hub_repos.get(slug)) content = render_yaml(block) if args.dry_run: print(f"[dry-run] would write {target}") continue target.write_text(content) if not validate_file(target): failed.append(slug) continue if not args.no_commit: maybe_commit(repo_path, dry_run=False) written.append(slug) print(f"Written: {len(written)}") for slug in written: print(f" + {slug}") print(f"Skipped (human-reviewed): {len(skipped_human)}") for slug in skipped_human: print(f" = {slug}") if failed: print(f"Failed validation: {len(failed)}", file=sys.stderr) for slug in failed: print(f" ! {slug}", file=sys.stderr) return 1 return 0 if __name__ == "__main__": raise SystemExit(main())