From db88a34b3e678611188d4ea721cfbc30e3ee8ca8 Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 22 Jun 2026 17:59:55 +0200 Subject: [PATCH] CUST-WP-0050 follow-up: human review, push tooling, SSH inventory Add human-review script for 13 high-blast-radius repos, bulk-push helper, and SSH-based Gitea inventory probe. Update exclusion list with SSH-verified absent slugs; marki-docx now classified and registered. --- .../repo-classification.exclusions.yaml | 30 ++- tools/gitea_ssh_inventory.py | 58 ++++++ tools/human_review_classifications.py | 194 ++++++++++++++++++ tools/push_repo_classifications.sh | 45 ++++ 4 files changed, 310 insertions(+), 17 deletions(-) create mode 100644 tools/gitea_ssh_inventory.py create mode 100644 tools/human_review_classifications.py create mode 100755 tools/push_repo_classifications.sh diff --git a/canon/standards/repo-classification.exclusions.yaml b/canon/standards/repo-classification.exclusions.yaml index 20d3673..29c4f03 100644 --- a/canon/standards/repo-classification.exclusions.yaml +++ b/canon/standards/repo-classification.exclusions.yaml @@ -4,18 +4,18 @@ # # Validate additions against canon/standards/repo-classification-standard_v1.0.md. -version: "1.0" +version: "1.1" updated: "2026-06-22" exclusions: # Forks and personal repos — not ecosystem inventory. - slug: tegwick/the-custodian gitea_path: tegwick/the-custodian - reason: fork of the-custodian; not a managed ecosystem repo + reason: fork path not found on Gitea (SSH verified 2026-06-22) - slug: python-snake gitea_path: lando_worsch/python-snake - reason: personal / non-ecosystem repo + reason: personal / non-ecosystem repo (exists on Gitea; excluded by policy) # Archived or collapsed hub registrations — superseded by another slug. - slug: markitect-project @@ -31,7 +31,7 @@ exclusions: reason: archived duplicate; collapsed into vergabe-teilnahme - slug: test_domain_v2 - reason: archived test domain; not active portfolio + reason: archived test domain; not present on Gitea coulomb org (SSH verified) # Local-only templates / sandboxes — not product inventory. - slug: hub-core-seed @@ -43,31 +43,27 @@ exclusions: - slug: .nvm reason: Node version manager checkout; not a coulomb project repo - # Gitea repos referenced in portfolio review but not present / not cloned locally - # at T11 execution time — classify when a checkout exists or Gitea inventory confirms. + # Portfolio-review slugs with no matching coulomb/* repo on Gitea (SSH verified 2026-06-22). - slug: binect-chrome - reason: not cloned locally; pending inventory confirmation + reason: not present on Gitea coulomb org; likely renamed or removed - slug: binect-js - reason: not cloned locally; pending inventory confirmation + reason: not present on Gitea coulomb org; likely renamed or removed - slug: direkt-vermittlung-de - reason: not cloned locally; pending inventory confirmation + reason: not present on Gitea coulomb org; likely renamed or removed - slug: polycode-sim - reason: not cloned locally; pending inventory confirmation + reason: not present on Gitea coulomb org; likely renamed or removed - slug: ralph-workplan - reason: not cloned locally; pending inventory confirmation + reason: not present on Gitea coulomb org; likely renamed or removed - slug: tele-mcp - reason: not cloned locally; pending inventory confirmation + reason: not present on Gitea coulomb org; likely renamed or removed - slug: testdrive-jsui - reason: not cloned locally; pending inventory confirmation + reason: not present on Gitea coulomb org; likely renamed or removed - slug: timeline-svg - reason: not cloned locally; pending inventory confirmation - - - slug: marki-docx - reason: registered in hub; no local checkout at T11 — classify on next clone \ No newline at end of file + reason: not present on Gitea coulomb org; likely renamed or removed \ No newline at end of file diff --git a/tools/gitea_ssh_inventory.py b/tools/gitea_ssh_inventory.py new file mode 100644 index 0000000..55fbd87 --- /dev/null +++ b/tools/gitea_ssh_inventory.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Compare State Hub registered repos against Gitea SSH reachability (no HTTP token).""" +from __future__ import annotations + +import json +import subprocess +import sys +import urllib.request + +API_BASE = "http://127.0.0.1:8000" +GITEA_REMOTE = "gitea-remote" + + +def hub_repos() -> list[dict]: + with urllib.request.urlopen(f"{API_BASE}/repos/", timeout=30) as resp: + return json.load(resp) + + +def gitea_exists(path: str) -> bool: + proc = subprocess.run( + [ + "git", + "ls-remote", + f"{GITEA_REMOTE}:{path}.git", + "HEAD", + ], + capture_output=True, + text=True, + env={**__import__("os").environ, "GIT_SSH_COMMAND": "ssh -o ConnectTimeout=8 -o BatchMode=yes"}, + ) + return proc.returncode == 0 and any(line.strip() for line in proc.stdout.splitlines()) + + +def main() -> int: + repos = hub_repos() + active = [r for r in repos if r.get("status") == "active"] + matched: list[str] = [] + missing: list[str] = [] + + for repo in sorted(active, key=lambda r: r["slug"]): + slug = repo["slug"] + if gitea_exists(f"coulomb/{slug}"): + matched.append(slug) + else: + missing.append(slug) + + print(f"State Hub active repos: {len(active)}") + print(f"Gitea SSH reachable (coulomb/): {len(matched)}") + print(f"Hub-only (no coulomb/ on Gitea SSH): {len(missing)}") + if missing: + print("\nMissing on Gitea:") + for slug in missing: + print(f" - {slug}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file diff --git a/tools/human_review_classifications.py b/tools/human_review_classifications.py new file mode 100644 index 0000000..741f1b6 --- /dev/null +++ b/tools/human_review_classifications.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +"""Apply human-reviewed classification corrections (CUST-WP-0050 follow-up).""" +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import yaml + +REPO_ROOT = Path(__file__).resolve().parent.parent +VALIDATOR = REPO_ROOT / "tools" / "validate_repo_classification.py" +HOME = Path.home() + +# Curated human-reviewed classifications for high-blast-radius portfolio anchors. +HUMAN_REVIEWS: dict[str, dict] = { + "helix-forge": { + "category": "product", + "domain": "infotech", + "secondary_domains": ["agents"], + "capability_tags": [ + "platform", + "capability-registry", + "coordination", + "knowledge", + "product-development", + ], + "business_stake": ["product", "technology", "execution", "automation", "intelligence"], + "business_mechanics": ["intention", "coordination", "operation", "adaptation"], + "notes": "Capability development platform; standard §13.1 — human confirmed.", + }, + "reuse-surface": { + "category": "product", + "domain": "infotech", + "secondary_domains": ["agents"], + "capability_tags": ["capability-registry", "discovery", "reuse", "maturity", "evidence"], + "business_stake": ["technology", "product", "intelligence", "automation"], + "business_mechanics": ["intention", "control", "adaptation"], + "notes": "Reuse discovery surface; standard §13.7 — human confirmed.", + }, + "coordination-engine": { + "category": "product", + "domain": "communication", + "secondary_domains": ["infotech", "agents"], + "capability_tags": ["coordination", "workflow", "orchestration", "evidence", "platform"], + "business_stake": ["product", "technology", "operations", "automation"], + "business_mechanics": ["coordination", "operation", "adaptation"], + "notes": "Goal-driven coordination framework; human confirmed.", + }, + "markitect-main": { + "category": "product", + "domain": "communication", + "secondary_domains": ["infotech", "agents"], + "capability_tags": ["knowledge", "documentation", "product-development", "platform"], + "business_stake": ["product", "technology", "execution"], + "business_mechanics": ["intention", "coordination", "operation", "adaptation"], + "notes": "Markitect successor to archived markitect-project; human confirmed.", + }, + "citation-evidence": { + "category": "product", + "domain": "infotech", + "secondary_domains": ["communication", "government"], + "capability_tags": ["citations", "evidence", "knowledge", "traceability", "source-management"], + "business_stake": ["intelligence", "legal", "product", "technology"], + "business_mechanics": ["control", "coordination", "adaptation"], + "notes": "Citation/evidence product; standard §13.5 — human confirmed.", + }, + "adaptive-pricing": { + "category": "product", + "domain": "financials", + "secondary_domains": ["infotech", "agents"], + "capability_tags": ["pricing", "monetization", "lifecycle", "decision-support", "product-development"], + "business_stake": ["finance", "product", "sales", "intelligence", "automation"], + "business_mechanics": ["intention", "control", "adaptation"], + "notes": "Adaptive pricing product; standard §13.6 — human confirmed.", + }, + "identity-canon": { + "category": "research", + "domain": "infotech", + "secondary_domains": ["government"], + "capability_tags": ["identity", "access-control", "terminology", "canon", "governance"], + "business_stake": ["technology", "legal", "operations", "intelligence"], + "business_mechanics": ["intention", "control", "adaptation"], + "notes": "Identity canon; standard §13.3 — human confirmed.", + }, + "net-kingdom": { + "category": "product", + "domain": "infotech", + "secondary_domains": [], + "capability_tags": ["security", "identity", "platform", "operations", "access-control"], + "business_stake": ["technology", "operations", "legal", "automation"], + "business_mechanics": ["control", "operation", "adaptation"], + "notes": "NetKingdom security/identity platform; standard §13.4 — human confirmed.", + }, + "audit-core": { + "category": "tooling", + "domain": "infotech", + "secondary_domains": [], + "capability_tags": ["audit", "traceability", "security", "governance", "operations"], + "business_stake": ["technology", "operations", "legal", "automation"], + "business_mechanics": ["control", "operation"], + "notes": "Multi-tenant audit emit capability; human confirmed.", + }, + "key-cape": { + "category": "product", + "domain": "infotech", + "secondary_domains": ["communication"], + "capability_tags": ["identity", "access-control", "security", "platform", "operations"], + "business_stake": ["technology", "operations", "legal", "product"], + "business_mechanics": ["control", "operation", "adaptation"], + "notes": "NetKingdom IAM Profile lightweight mode (Authelia/LLDAP/privacyIDEA); human corrected domain from communication→infotech.", + }, + "flex-auth": { + "category": "product", + "domain": "infotech", + "secondary_domains": ["government"], + "capability_tags": ["identity", "access-control", "policy", "governance", "audit"], + "business_stake": ["technology", "legal", "operations", "product"], + "business_mechanics": ["control", "coordination", "adaptation"], + "notes": "Policy-as-code authorization registry; human corrected domain from communication→infotech.", + }, + "ops-hub": { + "category": "tooling", + "domain": "infotech", + "secondary_domains": [], + "capability_tags": ["operations", "platform", "observability", "coordination", "governance"], + "business_stake": ["operations", "technology", "automation"], + "business_mechanics": ["coordination", "operation", "control"], + "notes": "Inter-Hub operations extension (environments, incidents, runbooks); human corrected category project→tooling.", + }, + "railiance-platform": { + "category": "tooling", + "domain": "financials", + "secondary_domains": ["infotech"], + "capability_tags": ["platform", "operations", "configuration", "governance"], + "business_stake": ["finance", "technology", "operations"], + "business_mechanics": ["control", "operation", "coordination"], + "notes": "Railiance platform substrate; human corrected category project→tooling.", + }, +} + + +def build_block(slug: str, data: dict) -> dict: + notes = data.pop("notes", None) + block = { + "repo_classification": { + "standard": "Repo Classification Standard", + "version": "1.0", + "classified_at": "2026-06-22", + "classified_by": "human", + **data, + } + } + if notes: + block["repo_classification"]["notes"] = notes + return block + + +def main() -> int: + updated: list[str] = [] + for slug, data in HUMAN_REVIEWS.items(): + repo_path = HOME / slug + target = repo_path / ".repo-classification.yaml" + if not repo_path.is_dir(): + print(f"skip {slug}: no checkout", file=sys.stderr) + continue + payload = build_block(slug, dict(data)) + target.write_text(yaml.dump(payload, sort_keys=False, allow_unicode=True)) + proc = subprocess.run([sys.executable, str(VALIDATOR), str(target)], capture_output=True, text=True) + if proc.returncode != 0: + print(proc.stdout, proc.stderr, file=sys.stderr) + return 1 + subprocess.run(["git", "add", ".repo-classification.yaml"], cwd=repo_path, check=True) + diff = subprocess.run(["git", "diff", "--cached", "--quiet"], cwd=repo_path) + if diff.returncode != 0: + subprocess.run( + [ + "git", + "commit", + "-m", + "Human-review .repo-classification.yaml (CUST-WP-0050 follow-up)", + ], + cwd=repo_path, + check=True, + ) + updated.append(slug) + print(f"Human-reviewed: {len(updated)}") + for slug in updated: + print(f" ✓ {slug}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file diff --git a/tools/push_repo_classifications.sh b/tools/push_repo_classifications.sh new file mode 100755 index 0000000..32c5f0e --- /dev/null +++ b/tools/push_repo_classifications.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Push unpushed .repo-classification.yaml commits across local ecosystem repos. +set -euo pipefail + +HOME_REPOS="${HOME}" +PUSHED=0 +SKIPPED=0 +FAILED=0 +FAILED_NAMES=() + +for dir in "$HOME_REPOS"/*/; do + [ -d "$dir/.git" ] || continue + [ -f "$dir/.repo-classification.yaml" ] || continue + + name=$(basename "$dir") + cd "$dir" + + if ! git rev-parse --abbrev-ref HEAD >/dev/null 2>&1; then + continue + fi + + branch=$(git rev-parse --abbrev-ref HEAD) + ahead=$(git rev-list --count "@{u}..HEAD" 2>/dev/null || echo "no-upstream") + + if [ "$ahead" = "no-upstream" ] || [ "$ahead" = "0" ]; then + SKIPPED=$((SKIPPED + 1)) + continue + fi + + echo "Pushing $name ($branch, $ahead commit(s))..." + if git push origin "$branch" 2>&1; then + PUSHED=$((PUSHED + 1)) + else + FAILED=$((FAILED + 1)) + FAILED_NAMES+=("$name") + fi +done + +echo "" +echo "Summary: pushed=$PUSHED skipped=$SKIPPED failed=$FAILED" +if [ "${#FAILED_NAMES[@]}" -gt 0 ]; then + echo "Failed:" + printf ' %s\n' "${FAILED_NAMES[@]}" + exit 1 +fi \ No newline at end of file