From db88a34b3e678611188d4ea721cfbc30e3ee8ca8 Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Mon, 22 Jun 2026 17:59:55 +0200
Subject: [PATCH] CUST-WP-0050 follow-up: human review, push tooling, SSH
 inventory

Add human-review script for 13 high-blast-radius repos, bulk-push helper,
and SSH-based Gitea inventory probe. Update exclusion list with SSH-verified
absent slugs; marki-docx now classified and registered.
---
 .../repo-classification.exclusions.yaml       |  30 ++-
 tools/gitea_ssh_inventory.py                  |  58 ++++++
 tools/human_review_classifications.py         | 194 ++++++++++++++++++
 tools/push_repo_classifications.sh            |  45 ++++
 4 files changed, 310 insertions(+), 17 deletions(-)
 create mode 100644 tools/gitea_ssh_inventory.py
 create mode 100644 tools/human_review_classifications.py
 create mode 100755 tools/push_repo_classifications.sh

diff --git a/canon/standards/repo-classification.exclusions.yaml b/canon/standards/repo-classification.exclusions.yaml
index 20d3673..29c4f03 100644
--- a/canon/standards/repo-classification.exclusions.yaml
+++ b/canon/standards/repo-classification.exclusions.yaml
@@ -4,18 +4,18 @@
 #
 # Validate additions against canon/standards/repo-classification-standard_v1.0.md.
 
-version: "1.0"
+version: "1.1"
 updated: "2026-06-22"
 
 exclusions:
   # Forks and personal repos — not ecosystem inventory.
   - slug: tegwick/the-custodian
     gitea_path: tegwick/the-custodian
-    reason: fork of the-custodian; not a managed ecosystem repo
+    reason: fork path not found on Gitea (SSH verified 2026-06-22)
 
   - slug: python-snake
     gitea_path: lando_worsch/python-snake
-    reason: personal / non-ecosystem repo
+    reason: personal / non-ecosystem repo (exists on Gitea; excluded by policy)
 
   # Archived or collapsed hub registrations — superseded by another slug.
   - slug: markitect-project
@@ -31,7 +31,7 @@ exclusions:
     reason: archived duplicate; collapsed into vergabe-teilnahme
 
   - slug: test_domain_v2
-    reason: archived test domain; not active portfolio
+    reason: archived test domain; not present on Gitea coulomb org (SSH verified)
 
   # Local-only templates / sandboxes — not product inventory.
   - slug: hub-core-seed
@@ -43,31 +43,27 @@ exclusions:
   - slug: .nvm
     reason: Node version manager checkout; not a coulomb project repo
 
-  # Gitea repos referenced in portfolio review but not present / not cloned locally
-  # at T11 execution time — classify when a checkout exists or Gitea inventory confirms.
+  # Portfolio-review slugs with no matching coulomb/* repo on Gitea (SSH verified 2026-06-22).
   - slug: binect-chrome
-    reason: not cloned locally; pending inventory confirmation
+    reason: not present on Gitea coulomb org; likely renamed or removed
 
   - slug: binect-js
-    reason: not cloned locally; pending inventory confirmation
+    reason: not present on Gitea coulomb org; likely renamed or removed
 
   - slug: direkt-vermittlung-de
-    reason: not cloned locally; pending inventory confirmation
+    reason: not present on Gitea coulomb org; likely renamed or removed
 
   - slug: polycode-sim
-    reason: not cloned locally; pending inventory confirmation
+    reason: not present on Gitea coulomb org; likely renamed or removed
 
   - slug: ralph-workplan
-    reason: not cloned locally; pending inventory confirmation
+    reason: not present on Gitea coulomb org; likely renamed or removed
 
   - slug: tele-mcp
-    reason: not cloned locally; pending inventory confirmation
+    reason: not present on Gitea coulomb org; likely renamed or removed
 
   - slug: testdrive-jsui
-    reason: not cloned locally; pending inventory confirmation
+    reason: not present on Gitea coulomb org; likely renamed or removed
 
   - slug: timeline-svg
-    reason: not cloned locally; pending inventory confirmation
-
-  - slug: marki-docx
-    reason: registered in hub; no local checkout at T11 — classify on next clone
\ No newline at end of file
+    reason: not present on Gitea coulomb org; likely renamed or removed
\ No newline at end of file
diff --git a/tools/gitea_ssh_inventory.py b/tools/gitea_ssh_inventory.py
new file mode 100644
index 0000000..55fbd87
--- /dev/null
+++ b/tools/gitea_ssh_inventory.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""Compare State Hub registered repos against Gitea SSH reachability (no HTTP token)."""
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+import urllib.request
+
+API_BASE = "http://127.0.0.1:8000"
+GITEA_REMOTE = "gitea-remote"
+
+
+def hub_repos() -> list[dict]:
+    with urllib.request.urlopen(f"{API_BASE}/repos/", timeout=30) as resp:
+        return json.load(resp)
+
+
+def gitea_exists(path: str) -> bool:
+    proc = subprocess.run(
+        [
+            "git",
+            "ls-remote",
+            f"{GITEA_REMOTE}:{path}.git",
+            "HEAD",
+        ],
+        capture_output=True,
+        text=True,
+        env={**__import__("os").environ, "GIT_SSH_COMMAND": "ssh -o ConnectTimeout=8 -o BatchMode=yes"},
+    )
+    return proc.returncode == 0 and any(line.strip() for line in proc.stdout.splitlines())
+
+
+def main() -> int:
+    repos = hub_repos()
+    active = [r for r in repos if r.get("status") == "active"]
+    matched: list[str] = []
+    missing: list[str] = []
+
+    for repo in sorted(active, key=lambda r: r["slug"]):
+        slug = repo["slug"]
+        if gitea_exists(f"coulomb/{slug}"):
+            matched.append(slug)
+        else:
+            missing.append(slug)
+
+    print(f"State Hub active repos: {len(active)}")
+    print(f"Gitea SSH reachable (coulomb/<slug>): {len(matched)}")
+    print(f"Hub-only (no coulomb/<slug> on Gitea SSH): {len(missing)}")
+    if missing:
+        print("\nMissing on Gitea:")
+        for slug in missing:
+            print(f"  - {slug}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
\ No newline at end of file
diff --git a/tools/human_review_classifications.py b/tools/human_review_classifications.py
new file mode 100644
index 0000000..741f1b6
--- /dev/null
+++ b/tools/human_review_classifications.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+"""Apply human-reviewed classification corrections (CUST-WP-0050 follow-up)."""
+from __future__ import annotations
+
+import subprocess
+import sys
+from pathlib import Path
+
+import yaml
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+VALIDATOR = REPO_ROOT / "tools" / "validate_repo_classification.py"
+HOME = Path.home()
+
+# Curated human-reviewed classifications for high-blast-radius portfolio anchors.
+HUMAN_REVIEWS: dict[str, dict] = {
+    "helix-forge": {
+        "category": "product",
+        "domain": "infotech",
+        "secondary_domains": ["agents"],
+        "capability_tags": [
+            "platform",
+            "capability-registry",
+            "coordination",
+            "knowledge",
+            "product-development",
+        ],
+        "business_stake": ["product", "technology", "execution", "automation", "intelligence"],
+        "business_mechanics": ["intention", "coordination", "operation", "adaptation"],
+        "notes": "Capability development platform; standard §13.1 — human confirmed.",
+    },
+    "reuse-surface": {
+        "category": "product",
+        "domain": "infotech",
+        "secondary_domains": ["agents"],
+        "capability_tags": ["capability-registry", "discovery", "reuse", "maturity", "evidence"],
+        "business_stake": ["technology", "product", "intelligence", "automation"],
+        "business_mechanics": ["intention", "control", "adaptation"],
+        "notes": "Reuse discovery surface; standard §13.7 — human confirmed.",
+    },
+    "coordination-engine": {
+        "category": "product",
+        "domain": "communication",
+        "secondary_domains": ["infotech", "agents"],
+        "capability_tags": ["coordination", "workflow", "orchestration", "evidence", "platform"],
+        "business_stake": ["product", "technology", "operations", "automation"],
+        "business_mechanics": ["coordination", "operation", "adaptation"],
+        "notes": "Goal-driven coordination framework; human confirmed.",
+    },
+    "markitect-main": {
+        "category": "product",
+        "domain": "communication",
+        "secondary_domains": ["infotech", "agents"],
+        "capability_tags": ["knowledge", "documentation", "product-development", "platform"],
+        "business_stake": ["product", "technology", "execution"],
+        "business_mechanics": ["intention", "coordination", "operation", "adaptation"],
+        "notes": "Markitect successor to archived markitect-project; human confirmed.",
+    },
+    "citation-evidence": {
+        "category": "product",
+        "domain": "infotech",
+        "secondary_domains": ["communication", "government"],
+        "capability_tags": ["citations", "evidence", "knowledge", "traceability", "source-management"],
+        "business_stake": ["intelligence", "legal", "product", "technology"],
+        "business_mechanics": ["control", "coordination", "adaptation"],
+        "notes": "Citation/evidence product; standard §13.5 — human confirmed.",
+    },
+    "adaptive-pricing": {
+        "category": "product",
+        "domain": "financials",
+        "secondary_domains": ["infotech", "agents"],
+        "capability_tags": ["pricing", "monetization", "lifecycle", "decision-support", "product-development"],
+        "business_stake": ["finance", "product", "sales", "intelligence", "automation"],
+        "business_mechanics": ["intention", "control", "adaptation"],
+        "notes": "Adaptive pricing product; standard §13.6 — human confirmed.",
+    },
+    "identity-canon": {
+        "category": "research",
+        "domain": "infotech",
+        "secondary_domains": ["government"],
+        "capability_tags": ["identity", "access-control", "terminology", "canon", "governance"],
+        "business_stake": ["technology", "legal", "operations", "intelligence"],
+        "business_mechanics": ["intention", "control", "adaptation"],
+        "notes": "Identity canon; standard §13.3 — human confirmed.",
+    },
+    "net-kingdom": {
+        "category": "product",
+        "domain": "infotech",
+        "secondary_domains": [],
+        "capability_tags": ["security", "identity", "platform", "operations", "access-control"],
+        "business_stake": ["technology", "operations", "legal", "automation"],
+        "business_mechanics": ["control", "operation", "adaptation"],
+        "notes": "NetKingdom security/identity platform; standard §13.4 — human confirmed.",
+    },
+    "audit-core": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": [],
+        "capability_tags": ["audit", "traceability", "security", "governance", "operations"],
+        "business_stake": ["technology", "operations", "legal", "automation"],
+        "business_mechanics": ["control", "operation"],
+        "notes": "Multi-tenant audit emit capability; human confirmed.",
+    },
+    "key-cape": {
+        "category": "product",
+        "domain": "infotech",
+        "secondary_domains": ["communication"],
+        "capability_tags": ["identity", "access-control", "security", "platform", "operations"],
+        "business_stake": ["technology", "operations", "legal", "product"],
+        "business_mechanics": ["control", "operation", "adaptation"],
+        "notes": "NetKingdom IAM Profile lightweight mode (Authelia/LLDAP/privacyIDEA); human corrected domain from communication→infotech.",
+    },
+    "flex-auth": {
+        "category": "product",
+        "domain": "infotech",
+        "secondary_domains": ["government"],
+        "capability_tags": ["identity", "access-control", "policy", "governance", "audit"],
+        "business_stake": ["technology", "legal", "operations", "product"],
+        "business_mechanics": ["control", "coordination", "adaptation"],
+        "notes": "Policy-as-code authorization registry; human corrected domain from communication→infotech.",
+    },
+    "ops-hub": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": [],
+        "capability_tags": ["operations", "platform", "observability", "coordination", "governance"],
+        "business_stake": ["operations", "technology", "automation"],
+        "business_mechanics": ["coordination", "operation", "control"],
+        "notes": "Inter-Hub operations extension (environments, incidents, runbooks); human corrected category project→tooling.",
+    },
+    "railiance-platform": {
+        "category": "tooling",
+        "domain": "financials",
+        "secondary_domains": ["infotech"],
+        "capability_tags": ["platform", "operations", "configuration", "governance"],
+        "business_stake": ["finance", "technology", "operations"],
+        "business_mechanics": ["control", "operation", "coordination"],
+        "notes": "Railiance platform substrate; human corrected category project→tooling.",
+    },
+}
+
+
+def build_block(slug: str, data: dict) -> dict:
+    notes = data.pop("notes", None)
+    block = {
+        "repo_classification": {
+            "standard": "Repo Classification Standard",
+            "version": "1.0",
+            "classified_at": "2026-06-22",
+            "classified_by": "human",
+            **data,
+        }
+    }
+    if notes:
+        block["repo_classification"]["notes"] = notes
+    return block
+
+
+def main() -> int:
+    updated: list[str] = []
+    for slug, data in HUMAN_REVIEWS.items():
+        repo_path = HOME / slug
+        target = repo_path / ".repo-classification.yaml"
+        if not repo_path.is_dir():
+            print(f"skip {slug}: no checkout", file=sys.stderr)
+            continue
+        payload = build_block(slug, dict(data))
+        target.write_text(yaml.dump(payload, sort_keys=False, allow_unicode=True))
+        proc = subprocess.run([sys.executable, str(VALIDATOR), str(target)], capture_output=True, text=True)
+        if proc.returncode != 0:
+            print(proc.stdout, proc.stderr, file=sys.stderr)
+            return 1
+        subprocess.run(["git", "add", ".repo-classification.yaml"], cwd=repo_path, check=True)
+        diff = subprocess.run(["git", "diff", "--cached", "--quiet"], cwd=repo_path)
+        if diff.returncode != 0:
+            subprocess.run(
+                [
+                    "git",
+                    "commit",
+                    "-m",
+                    "Human-review .repo-classification.yaml (CUST-WP-0050 follow-up)",
+                ],
+                cwd=repo_path,
+                check=True,
+            )
+        updated.append(slug)
+    print(f"Human-reviewed: {len(updated)}")
+    for slug in updated:
+        print(f"  ✓ {slug}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
\ No newline at end of file
diff --git a/tools/push_repo_classifications.sh b/tools/push_repo_classifications.sh
new file mode 100755
index 0000000..32c5f0e
--- /dev/null
+++ b/tools/push_repo_classifications.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Push unpushed .repo-classification.yaml commits across local ecosystem repos.
+set -euo pipefail
+
+HOME_REPOS="${HOME}"
+PUSHED=0
+SKIPPED=0
+FAILED=0
+FAILED_NAMES=()
+
+for dir in "$HOME_REPOS"/*/; do
+  [ -d "$dir/.git" ] || continue
+  [ -f "$dir/.repo-classification.yaml" ] || continue
+
+  name=$(basename "$dir")
+  cd "$dir"
+
+  if ! git rev-parse --abbrev-ref HEAD >/dev/null 2>&1; then
+    continue
+  fi
+
+  branch=$(git rev-parse --abbrev-ref HEAD)
+  ahead=$(git rev-list --count "@{u}..HEAD" 2>/dev/null || echo "no-upstream")
+
+  if [ "$ahead" = "no-upstream" ] || [ "$ahead" = "0" ]; then
+    SKIPPED=$((SKIPPED + 1))
+    continue
+  fi
+
+  echo "Pushing $name ($branch, $ahead commit(s))..."
+  if git push origin "$branch" 2>&1; then
+    PUSHED=$((PUSHED + 1))
+  else
+    FAILED=$((FAILED + 1))
+    FAILED_NAMES+=("$name")
+  fi
+done
+
+echo ""
+echo "Summary: pushed=$PUSHED skipped=$SKIPPED failed=$FAILED"
+if [ "${#FAILED_NAMES[@]}" -gt 0 ]; then
+  echo "Failed:"
+  printf '  %s\n' "${FAILED_NAMES[@]}"
+  exit 1
+fi
\ No newline at end of file