feat(classification-spine): implement STATE-WP-0065 repo-anchored model

Replace the ad-hoc coordination-domain spine with the Repo Classification
Standard: 14 market domains, classification columns on managed_repos, and
workplans anchored by repo_id (topic_id optional).

- Add Alembic migration d8e9f0a1b2c3 with data backfill and workstream→workplan rename
- Add api/classification.py validation and register-from-classification tooling
- Expose workplan-first REST/MCP surface with legacy workstream aliases
- Add C-24 consistency rule and legacy domain frontmatter mapping
- Update dashboard repos page with category/capability/stake filters
- Update orientation docs; mark STATE-WP-0065 finished
This commit is contained in:
2026-06-22 13:52:13 +02:00
parent 279be4ffbd
commit 0949d4c0d8
84 changed files with 4494 additions and 1111 deletions

View File

@@ -26,6 +26,7 @@ Checks:
C-20 workstream-dependency-missing WARN Yes Workplan dependency frontmatter missing from DB graph
C-22 task-description-drift WARN Yes Task description/content differs between file and DB
C-23 workstream-active-task-planning-status WARN Yes Workstream/workplan is planning while a task is progress or wait
C-24 repo-classification-missing WARN No Registered repo lacks a valid .repo-classification.yaml on disk
Usage:
python scripts/consistency_check.py --repo SLUG [--fix] [--no-writeback] [--json] [--api-base URL]
@@ -42,7 +43,7 @@ Exit codes (--remote --all scheduled sweep):
1 — automation error: API unreachable, repo list fetch failed, C-00 on
any repo, or other infrastructure fault that prevented a full run
Assessment failures (C-01..C-23 except C-00) are repo hygiene gaps recorded
Assessment failures (C-01..C-24 except C-00) are repo hygiene gaps recorded
in the sweep report for later improvement. They do not fail the scheduler.
Agent/operator Make wrappers normalize exit code 2 to shell success while
@@ -78,6 +79,11 @@ from api.workplan_status import ( # noqa: E402
normalize_workstream_status as _normalize_workstream_status,
ready_review_status,
)
from api.classification import ( # noqa: E402
CLASSIFICATION_FILENAME,
load_classification_file,
resolve_topic_domain_slug,
)
from api.services.lifecycle import should_activate_parent_for_active_tasks # noqa: E402
from api.task_status import ( # noqa: E402
CANONICAL_TASK_STATUSES,
@@ -713,6 +719,31 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
repo_dir = Path(repo_path)
workplans_dir = repo_dir / "workplans"
repo_market_domain = str(repo.get("domain_slug") or "").strip()
# C-24: repo classification file missing or invalid (always WARN — migration rows too)
class_data, class_errors, class_warnings = load_classification_file(repo_dir)
if class_data is None:
classified_by = str(repo.get("classified_by") or "").strip()
if class_errors:
detail = "; ".join(class_errors)
else:
detail = f"{CLASSIFICATION_FILENAME} missing on disk"
if classified_by == "migration":
detail = f"{detail} (DB row is migration-derived — commit a human-reviewed file when ready)"
report.add(
severity="WARN",
check_id="C-24",
message=f"Repo classification gap: {detail}",
fixable=False,
)
for warning in class_warnings:
report.add(
severity="WARN",
check_id="C-24",
message=f"Repo classification advisory: {warning}",
fixable=False,
)
# C-01: workplans/ directory missing
if not workplans_dir.is_dir():
@@ -804,6 +835,7 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
"body": body,
"repo_id": repo_id,
"domain": file_domain,
"repo_market_domain": repo_market_domain,
},
)
continue
@@ -1708,6 +1740,7 @@ def fix_repo(
wp_file = Path(ctx["wp_file"])
meta = ctx["meta"]
domain = ctx["domain"]
repo_market_domain = str(ctx.get("repo_market_domain") or "").strip()
repo_id_val = ctx["repo_id"]
body = ctx.get("body", "")
wp_id = str(meta.get("id", "")).strip()
@@ -1717,17 +1750,23 @@ def fix_repo(
if status not in VALID_WP_STATUSES:
status = "active"
# Find topic_id for this domain
# Find topic_id — workplan frontmatter may still use legacy
# coordination slugs (e.g. custodian); map to market domain first.
topic_domain = resolve_topic_domain_slug(
domain,
repo_market_domain=repo_market_domain or None,
)
topics = _api_get(api_base, "/topics")
topic_id = None
if isinstance(topics, list):
for t in topics:
if t.get("domain_slug") == domain:
if t.get("domain_slug") == topic_domain:
topic_id = t["id"]
break
if topic_id is None:
report.fixes_applied.append(
f"C-06 SKIP {wp_id}: no topic found for domain '{domain}'"
f"C-06 SKIP {wp_id}: no topic found for domain "
f"'{topic_domain}' (workplan domain={domain!r})"
)
continue

View File

@@ -0,0 +1,635 @@
#!/usr/bin/env python3
"""Idempotent registration from committed ``.repo-classification.yaml`` (STATE-WP-0065 P3).
Reads classification from a repo checkout, validates against the canon allowed-values,
and upserts the ``managed_repos`` row (create or update classification + market domain).
Usage:
python scripts/register_from_classification.py --repo-path /path/to/repo [--dry-run]
python scripts/register_from_classification.py --slug state-hub [--dry-run]
python scripts/register_from_classification.py --bulk [--dry-run]
python scripts/register_from_classification.py --help
"""
from __future__ import annotations
import argparse
import asyncio
import json
import re
import socket
import subprocess
import sys
from dataclasses import dataclass, field
from datetime import date
from pathlib import Path
from typing import Any, Literal
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
from sqlalchemy import select # noqa: E402
from api.classification import ( # noqa: E402
CLASSIFICATION_FILENAME,
ClassificationData,
load_classification_file,
)
from api.config import settings # noqa: E402
from api.database import async_session_factory, engine # noqa: E402
from api.models.domain import Domain # noqa: E402
from api.models.managed_repo import ManagedRepo # noqa: E402
try:
import httpx
_HAS_HTTPX = True
except ImportError:
_HAS_HTTPX = False
Outcome = Literal["registered", "updated", "skipped", "invalid"]
@dataclass
class RowResult:
slug: str
path: str
outcome: Outcome
detail: str = ""
warnings: list[str] = field(default_factory=list)
@dataclass
class RegistrationReport:
results: list[RowResult] = field(default_factory=list)
def add(self, result: RowResult) -> None:
self.results.append(result)
def counts(self) -> dict[str, int]:
totals = {"registered": 0, "updated": 0, "skipped": 0, "invalid": 0}
for row in self.results:
totals[row.outcome] = totals.get(row.outcome, 0) + 1
return totals
def render_text(self) -> str:
lines = ["register-from-classification report", ""]
for row in self.results:
lines.append(f" [{row.outcome:10}] {row.slug:30} {row.detail}")
for warning in row.warnings:
lines.append(f" warn: {warning}")
counts = self.counts()
lines.append("")
lines.append(
"Summary: "
f"registered={counts['registered']} "
f"updated={counts['updated']} "
f"skipped={counts['skipped']} "
f"invalid={counts['invalid']}"
)
return "\n".join(lines)
def to_dict(self) -> dict[str, Any]:
return {
"summary": self.counts(),
"results": [
{
"slug": r.slug,
"path": r.path,
"outcome": r.outcome,
"detail": r.detail,
"warnings": r.warnings,
}
for r in self.results
],
}
def _slugify(name: str) -> str:
slug = re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")
return slug or "repo"
def _parse_classified_at(value: str | None) -> date | None:
if not value:
return None
try:
return date.fromisoformat(str(value)[:10])
except ValueError:
return None
def _git_value(repo_path: Path, args: list[str]) -> str | None:
try:
return subprocess.check_output(
["git", *args],
cwd=repo_path,
stderr=subprocess.DEVNULL,
text=True,
).strip() or None
except (subprocess.CalledProcessError, FileNotFoundError, OSError):
return None
def _git_root(path: Path) -> Path:
root = _git_value(path, ["rev-parse", "--show-toplevel"])
return Path(root) if root else path.resolve()
def _resolve_repo_path_for_host(repo: ManagedRepo) -> str | None:
hostname = socket.gethostname()
host_paths = repo.host_paths or {}
path = host_paths.get(hostname) or repo.local_path
if path and Path(path).is_dir():
return path
for candidate in host_paths.values():
if candidate and Path(candidate).is_dir():
return candidate
return None
def _classification_changed(repo: ManagedRepo, data: ClassificationData, domain_id) -> bool:
if repo.domain_id != domain_id:
return True
fields = (
("category", data.category),
("secondary_domains", data.secondary_domains or None),
("capability_tags", data.capability_tags or None),
("business_stake", data.business_stake or None),
("business_mechanics", data.business_mechanics or None),
("classified_at", _parse_classified_at(data.classified_at)),
("classified_by", data.classified_by),
("standard_version", data.standard_version),
)
for attr, new_val in fields:
if getattr(repo, attr) != new_val:
return True
return False
def _apply_classification(repo: ManagedRepo, data: ClassificationData, domain_id) -> None:
repo.domain_id = domain_id
repo.category = data.category
repo.secondary_domains = data.secondary_domains or None
repo.capability_tags = data.capability_tags or None
repo.business_stake = data.business_stake or None
repo.business_mechanics = data.business_mechanics or None
repo.classified_at = _parse_classified_at(data.classified_at)
repo.classified_by = data.classified_by
repo.standard_version = data.standard_version
async def _get_domain_id(session, market_slug: str):
result = await session.execute(select(Domain).where(Domain.slug == market_slug))
domain = result.scalar_one_or_none()
if domain is None:
raise ValueError(f"Market domain '{market_slug}' not found in domains table")
return domain.id
async def _get_repo_by_slug(session, slug: str) -> ManagedRepo | None:
result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug))
return result.scalar_one_or_none()
def _api_request(
method: str,
path: str,
*,
api_base: str,
body: dict | None = None,
) -> tuple[int, Any]:
if not _HAS_HTTPX:
return (0, {"_error": "httpx not installed"})
url = api_base.rstrip("/") + path
try:
with httpx.Client(timeout=30.0) as client:
response = client.request(method, url, json=body)
if response.status_code == 204:
return response.status_code, None
try:
payload = response.json()
except Exception:
payload = {"_raw": response.text}
return response.status_code, payload
except httpx.HTTPError as exc:
return (0, {"_error": str(exc)})
async def _upsert_via_db(
*,
slug: str,
repo_path: Path,
data: ClassificationData,
dry_run: bool,
report: RegistrationReport,
) -> None:
git_root = _git_root(repo_path)
remote_url = _git_value(git_root, ["remote", "get-url", "origin"])
git_fingerprint = _git_value(git_root, ["rev-list", "--max-parents=0", "HEAD"])
hostname = socket.gethostname()
display_name = git_root.name.replace("-", " ").replace("_", " ").title()
async with async_session_factory() as session:
try:
domain_id = await _get_domain_id(session, data.domain)
except ValueError as exc:
if dry_run:
report.add(
RowResult(
slug,
str(git_root),
"skipped",
f"dry-run: {exc}",
)
)
return
report.add(RowResult(slug, str(git_root), "invalid", str(exc)))
return
repo = await _get_repo_by_slug(session, slug)
if repo is None:
if dry_run:
report.add(
RowResult(
slug,
str(git_root),
"registered",
f"would create repo under domain '{data.domain}' (dry-run)",
)
)
return
repo = ManagedRepo(
domain_id=domain_id,
slug=slug,
name=display_name,
local_path=str(git_root),
host_paths={hostname: str(git_root)},
remote_url=remote_url,
git_fingerprint=git_fingerprint,
status="active",
)
_apply_classification(repo, data, domain_id)
session.add(repo)
await session.commit()
report.add(
RowResult(slug, str(git_root), "registered", f"domain={data.domain}")
)
return
warnings: list[str] = []
if not _classification_changed(repo, data, domain_id):
if repo.local_path != str(git_root):
if dry_run:
report.add(
RowResult(
slug,
str(git_root),
"skipped",
"classification unchanged; would refresh local_path (dry-run)",
)
)
return
repo.local_path = str(git_root)
host_paths = dict(repo.host_paths or {})
host_paths[hostname] = str(git_root)
repo.host_paths = host_paths
if remote_url:
repo.remote_url = remote_url
if git_fingerprint:
repo.git_fingerprint = git_fingerprint
await session.commit()
report.add(
RowResult(slug, str(git_root), "skipped", "paths refreshed only")
)
return
report.add(
RowResult(slug, str(git_root), "skipped", "classification already current")
)
return
if dry_run:
report.add(
RowResult(
slug,
str(git_root),
"updated",
f"would update classification (domain={data.domain}) (dry-run)",
)
)
return
_apply_classification(repo, data, domain_id)
repo.local_path = str(git_root)
host_paths = dict(repo.host_paths or {})
host_paths[hostname] = str(git_root)
repo.host_paths = host_paths
if remote_url:
repo.remote_url = remote_url
if git_fingerprint:
repo.git_fingerprint = git_fingerprint
await session.commit()
report.add(
RowResult(slug, str(git_root), "updated", f"domain={data.domain}")
)
async def _upsert_via_api(
*,
slug: str,
repo_path: Path,
data: ClassificationData,
dry_run: bool,
api_base: str,
report: RegistrationReport,
) -> None:
git_root = _git_root(repo_path)
remote_url = _git_value(git_root, ["remote", "get-url", "origin"])
git_fingerprint = _git_value(git_root, ["rev-list", "--max-parents=0", "HEAD"])
hostname = socket.gethostname()
display_name = git_root.name.replace("-", " ").replace("_", " ").title()
status, existing = _api_request("GET", f"/repos/{slug}", api_base=api_base)
if status == 404 or (isinstance(existing, dict) and existing.get("detail")):
existing = None
elif status == 0:
report.add(
RowResult(
slug,
str(git_root),
"invalid",
f"API unreachable: {existing.get('_error', existing)}",
)
)
return
patch_body = {
"category": data.category,
"secondary_domains": data.secondary_domains,
"capability_tags": data.capability_tags,
"business_stake": data.business_stake,
"business_mechanics": data.business_mechanics,
"classified_at": data.classified_at,
"classified_by": data.classified_by,
"standard_version": data.standard_version,
"domain_slug": data.domain,
"local_path": str(git_root),
"remote_url": remote_url,
"git_fingerprint": git_fingerprint,
}
if existing is None:
if dry_run:
report.add(
RowResult(
slug,
str(git_root),
"registered",
f"would POST /repos/ domain={data.domain} (dry-run)",
)
)
return
post_body = {
"domain_slug": data.domain,
"slug": slug,
"name": display_name,
"local_path": str(git_root),
"host_paths": {hostname: str(git_root)},
"remote_url": remote_url,
"git_fingerprint": git_fingerprint,
}
code, created = _api_request("POST", "/repos/", api_base=api_base, body=post_body)
if code not in (200, 201):
detail = created.get("detail", created) if isinstance(created, dict) else created
report.add(RowResult(slug, str(git_root), "invalid", f"POST failed: {detail}"))
return
code, updated = _api_request(
"PATCH", f"/repos/{slug}", api_base=api_base, body=patch_body
)
if code != 200:
detail = updated.get("detail", updated) if isinstance(updated, dict) else updated
report.add(
RowResult(
slug,
str(git_root),
"invalid",
f"created repo but classification PATCH failed: {detail}",
)
)
return
report.add(RowResult(slug, str(git_root), "registered", f"domain={data.domain}"))
return
if dry_run:
report.add(
RowResult(
slug,
str(git_root),
"updated",
f"would PATCH /repos/{slug} domain={data.domain} (dry-run)",
)
)
return
code, updated = _api_request(
"PATCH", f"/repos/{slug}", api_base=api_base, body=patch_body
)
if code != 200:
detail = updated.get("detail", updated) if isinstance(updated, dict) else updated
report.add(RowResult(slug, str(git_root), "invalid", f"PATCH failed: {detail}"))
return
_api_request(
"POST",
f"/repos/{slug}/paths",
api_base=api_base,
body={"host": hostname, "path": str(git_root)},
)
report.add(RowResult(slug, str(git_root), "updated", f"domain={data.domain}"))
async def register_one(
*,
slug: str,
repo_path: Path,
dry_run: bool = False,
use_api: bool = False,
api_base: str | None = None,
report: RegistrationReport | None = None,
) -> RowResult:
"""Register or update a single repo from its classification file."""
report = report or RegistrationReport()
git_root = _git_root(repo_path)
data, errors, warnings = load_classification_file(git_root)
if data is None:
result = RowResult(
slug,
str(git_root),
"invalid",
"; ".join(errors) or "classification invalid",
warnings=warnings,
)
report.add(result)
return result
if use_api:
await _upsert_via_api(
slug=slug,
repo_path=git_root,
data=data,
dry_run=dry_run,
api_base=api_base or settings.api_base,
report=report,
)
else:
await _upsert_via_db(
slug=slug,
repo_path=git_root,
data=data,
dry_run=dry_run,
report=report,
)
return report.results[-1]
async def _bulk_targets(session) -> list[tuple[str, str]]:
result = await session.execute(
select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.slug)
)
targets: list[tuple[str, str]] = []
for repo in result.scalars().all():
path = _resolve_repo_path_for_host(repo)
if path:
targets.append((repo.slug, path))
return targets
async def run_registration(args: argparse.Namespace) -> RegistrationReport:
report = RegistrationReport()
use_api = args.api and not args.db
if args.bulk:
async with async_session_factory() as session:
targets = await _bulk_targets(session)
if not targets:
report.add(
RowResult("(bulk)", "", "skipped", "no active repos with accessible local paths")
)
return report
for slug, path in targets:
await register_one(
slug=slug,
repo_path=Path(path),
dry_run=args.dry_run,
use_api=use_api,
api_base=args.api_base,
report=report,
)
return report
if args.repo_path:
repo_path = Path(args.repo_path).expanduser().resolve()
slug = args.slug or _slugify(_git_root(repo_path).name)
await register_one(
slug=slug,
repo_path=repo_path,
dry_run=args.dry_run,
use_api=use_api,
api_base=args.api_base,
report=report,
)
return report
if args.slug:
async with async_session_factory() as session:
repo = await _get_repo_by_slug(session, args.slug)
if repo is None:
report.add(RowResult(args.slug, "", "invalid", "repo slug not found in DB"))
return report
path = _resolve_repo_path_for_host(repo)
if not path:
report.add(
RowResult(
args.slug,
"",
"invalid",
"no accessible local path (local_path / host_paths)",
)
)
return report
await register_one(
slug=args.slug,
repo_path=Path(path),
dry_run=args.dry_run,
use_api=use_api,
api_base=args.api_base,
report=report,
)
return report
raise SystemExit("Specify --repo-path PATH, --slug SLUG, or --bulk")
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Register or update managed_repos from .repo-classification.yaml",
)
parser.add_argument("--repo-path", metavar="PATH", help="Local git checkout path")
parser.add_argument(
"--slug",
metavar="SLUG",
help="Registered repo slug (required with --bulk omitted unless --repo-path given)",
)
parser.add_argument(
"--bulk",
action="store_true",
help="All active registered repos with accessible local paths",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Report actions without writing to DB/API",
)
parser.add_argument(
"--api",
action="store_true",
help="Upsert via REST API (default: direct DB session)",
)
parser.add_argument(
"--db",
action="store_true",
help="Force direct DB session (overrides --api)",
)
parser.add_argument(
"--api-base",
default=settings.api_base,
help=f"State Hub API base URL (default: {settings.api_base})",
)
parser.add_argument("--json", action="store_true", help="Emit JSON report")
return parser
def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
if args.bulk:
if args.repo_path:
parser.error("--bulk cannot be combined with --repo-path")
elif args.repo_path:
pass
elif args.slug:
pass
else:
parser.error("Specify one of --repo-path PATH, --slug SLUG, or --bulk")
report = asyncio.run(run_registration(args))
if args.json:
print(json.dumps(report.to_dict(), indent=2))
else:
print(report.render_text())
counts = report.counts()
return 1 if counts["invalid"] else 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,334 @@
"""Constants and mappings for STATE-WP-0065 P1 spine migration.
Shared by the Alembic revision and the dry-run report script.
"""
from __future__ import annotations
import uuid
from datetime import date
# Deterministic UUIDs for market-domain rows (stable across runs).
_MARKET_DOMAIN_NAMESPACE = uuid.UUID("8dc7d106-11e2-41df-b512-89ed69d2a65f")
# 14 fixed market domains from Repo Classification Standard v1.0 §6.
MARKET_DOMAINS: list[tuple[str, str]] = [
("infotech", "Infotech"),
("financials", "Financials"),
("communication", "Communication"),
("consumer", "Consumer"),
("health", "Health"),
("industrials", "Industrials"),
("energy", "Energy"),
("utilities", "Utilities"),
("materials", "Materials"),
("realestate", "Real Estate"),
("crypto", "Crypto"),
("agents", "Agents"),
("space", "Space"),
("government", "Government"),
]
# Old coordination-domain slugs (pre-migration ``domains`` table) → market domain.
OLD_DOMAIN_TO_MARKET: dict[str, str] = {
"custodian": "infotech",
"railiance": "financials",
"markitect": "communication",
"coulomb_social": "communication",
"personhood": "government",
"foerster_capabilities": "agents",
# Extended coordination domains (beyond the original 6 canonical seeds).
"capabilities": "agents",
"canon": "infotech",
"citation_evidence": "infotech",
"helix_forge": "infotech",
"inter_hub": "infotech",
"netkingdom": "communication",
"stack": "infotech",
"vergabe_teilnahme": "government",
"whynot": "consumer",
"test_domain_v2": "infotech",
}
# Best-effort reverse map for downgrade (lossy: many market domains → one old slug).
MARKET_TO_OLD_DOMAIN: dict[str, str] = {
market: old
for old, market in OLD_DOMAIN_TO_MARKET.items()
}
# Legacy coordination domains restored on downgrade.
OLD_COORDINATION_DOMAINS: list[tuple[str, str]] = [
("custodian", "The Custodian"),
("railiance", "Railiance"),
("markitect", "Markitect"),
("coulomb_social", "Coulomb.social"),
("personhood", "Personhood"),
("foerster_capabilities", "Foerster Capabilities"),
]
# Human-reviewed classifications for the 11 custodian-domain fixture repos.
REPO_CLASSIFICATIONS: dict[str, dict] = {
"the-custodian": {
"category": "research",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"governance",
"knowledge",
"coordination",
"policy",
"documentation",
],
"business_stake": ["technology", "operations", "intelligence", "execution"],
"business_mechanics": ["intention", "control", "coordination", "adaptation"],
},
"inter-hub": {
"category": "research",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"governance",
"observability",
"platform",
"coordination",
"orchestration",
],
"business_stake": ["technology", "intelligence", "operations"],
"business_mechanics": ["control", "coordination", "adaptation"],
},
"state-hub": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"coordination",
"knowledge",
"platform",
"observability",
"governance",
],
"business_stake": [
"technology",
"operations",
"product",
"intelligence",
"automation",
],
"business_mechanics": ["coordination", "control", "operation", "adaptation"],
},
"hub-core": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": [],
"capability_tags": ["platform", "configuration", "orchestration"],
"business_stake": ["technology", "execution", "product"],
"business_mechanics": ["operation"],
},
"activity-core": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"workflow",
"orchestration",
"automation",
"coordination",
"observability",
],
"business_stake": ["technology", "operations", "automation", "execution"],
"business_mechanics": ["coordination", "operation", "adaptation"],
},
"issue-core": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"workflow",
"coordination",
"orchestration",
"traceability",
],
"business_stake": ["technology", "product", "operations", "automation"],
"business_mechanics": ["coordination", "operation"],
},
"kaizen-agentic": {
"category": "tooling",
"domain": "agents",
"secondary_domains": ["infotech"],
"capability_tags": [
"orchestration",
"automation",
"coordination",
"knowledge",
"documentation",
],
"business_stake": [
"technology",
"product",
"automation",
"people",
"intelligence",
],
"business_mechanics": [
"intention",
"coordination",
"operation",
"adaptation",
],
},
"llm-connect": {
"category": "tooling",
"domain": "agents",
"secondary_domains": ["infotech"],
"capability_tags": [
"orchestration",
"model-routing",
"configuration",
"automation",
],
"business_stake": ["technology", "product", "automation"],
"business_mechanics": ["operation", "adaptation"],
},
"ops-bridge": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": [],
"capability_tags": [
"operations",
"access-control",
"platform",
"observability",
"orchestration",
],
"business_stake": ["operations", "technology", "automation"],
"business_mechanics": ["control", "operation", "adaptation"],
},
"ops-warden": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": [],
"capability_tags": [
"identity",
"access-control",
"security",
"policy",
"audit",
"governance",
],
"business_stake": ["technology", "operations", "legal", "automation"],
"business_mechanics": ["control", "operation"],
},
"email-connect": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["communication"],
"capability_tags": [
"evidence",
"traceability",
"source-management",
"automation",
],
"business_stake": ["technology", "operations", "legal"],
"business_mechanics": ["operation", "coordination"],
},
}
# Repo discrepancy resolution (STATE-WP-0065 §P1 data migration).
REPO_DISPOSITIONS: dict[str, dict] = {
"markitect-project": {
"action": "relink_to",
"target_slug": "markitect-main",
"archive": True,
},
"railiance-bootstrap": {
"action": "archive",
},
"railiance-hosts": {
"action": "archive",
},
"vergabe_teilnahme": {
"action": "collapse_into",
"target_slug": "vergabe-teilnahme",
"archive": True,
},
}
# Fallback repo slug for orphan workplans after backfill.
FALLBACK_REPO_SLUG = "state-hub"
STANDARD_VERSION = "1.0"
def market_domain_uuid(slug: str) -> str:
"""Deterministic UUID string for a market-domain slug."""
return str(uuid.uuid5(_MARKET_DOMAIN_NAMESPACE, f"state-hub.market-domain.{slug}"))
def old_domain_uuid(slug: str) -> str:
"""Deterministic UUID string for a legacy coordination-domain slug."""
return str(uuid.uuid5(_MARKET_DOMAIN_NAMESPACE, f"state-hub.coordination-domain.{slug}"))
def derive_classification(repo_slug: str, old_domain_slug: str | None) -> dict:
"""Return a classification dict for *repo_slug*.
Uses committed ``REPO_CLASSIFICATIONS`` when present; otherwise derives a
migration-time classification from the old coordination domain.
"""
if repo_slug in REPO_CLASSIFICATIONS:
data = dict(REPO_CLASSIFICATIONS[repo_slug])
data.setdefault("classified_by", "human")
return data
market = OLD_DOMAIN_TO_MARKET.get(old_domain_slug or "", "infotech")
# Domain-specific heuristics for repos without committed classification files.
category = "project"
secondary_domains: list[str] = []
capability_tags: list[str] = []
business_stake: list[str] = []
business_mechanics: list[str] = []
if old_domain_slug == "custodian":
category = "tooling"
capability_tags = ["platform"]
business_stake = ["technology", "operations"]
elif old_domain_slug == "railiance":
category = "project"
capability_tags = ["platform", "operations"]
business_stake = ["technology", "operations"]
elif old_domain_slug == "markitect":
category = "project"
capability_tags = ["knowledge", "documentation"]
business_stake = ["technology", "product"]
elif old_domain_slug == "coulomb_social":
category = "experimental"
capability_tags = ["marketplace", "collaboration"]
business_stake = ["product", "sales"]
elif old_domain_slug == "personhood":
category = "research"
capability_tags = ["governance", "policy"]
business_stake = ["legal", "technology", "intelligence"]
business_mechanics = ["intention", "control"]
elif old_domain_slug == "foerster_capabilities":
category = "research"
capability_tags = ["knowledge"]
business_stake = ["intelligence", "technology"]
return {
"category": category,
"domain": market,
"secondary_domains": secondary_domains,
"capability_tags": capability_tags,
"business_stake": business_stake,
"business_mechanics": business_mechanics,
"classified_by": "migration",
}
def migration_provenance() -> dict:
"""Provenance fields applied during Alembic backfill."""
return {
"classified_at": date.today().isoformat(),
"classified_by": "migration",
"standard_version": STANDARD_VERSION,
}

View File

@@ -0,0 +1,206 @@
#!/usr/bin/env python3
"""Dry-run report for STATE-WP-0065 P1 spine migration.
Prints would-be classification, domain, repo-disposition, and workplan-anchor
changes without applying them. Requires a live PostgreSQL connection (same
DATABASE_URL as the API).
"""
from __future__ import annotations
import asyncio
import sys
from pathlib import Path
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
from sqlalchemy import text # noqa: E402
from sqlalchemy.ext.asyncio import AsyncSession # noqa: E402
from api.database import async_session_factory, engine # noqa: E402
from scripts.spine_migration_data import ( # noqa: E402
FALLBACK_REPO_SLUG,
MARKET_DOMAINS,
OLD_DOMAIN_TO_MARKET,
REPO_CLASSIFICATIONS,
REPO_DISPOSITIONS,
derive_classification,
market_domain_uuid,
)
def _section(title: str) -> None:
print()
print("=" * 72)
print(title)
print("=" * 72)
async def _report_domains(session: AsyncSession) -> None:
_section("Domain spine replacement")
result = await session.execute(
text("SELECT slug, name FROM domains ORDER BY slug")
)
current = result.fetchall()
current_slugs = {row[0] for row in current}
print(f"Current domains ({len(current)}):")
for slug, name in current:
mapped = OLD_DOMAIN_TO_MARKET.get(slug, "(no mapping — would delete)")
print(f" {slug:25}{mapped}")
print(f"\nMarket domains to insert ({len(MARKET_DOMAINS)}):")
for slug, name in MARKET_DOMAINS:
flag = "exists" if slug in current_slugs else "NEW"
print(f" [{flag:5}] {slug:20} {name:20} id={market_domain_uuid(slug)}")
async def _report_classifications(session: AsyncSession) -> None:
_section("Repo classification backfill")
rows = await session.execute(
text(
"""
SELECT mr.slug, mr.status, d.slug AS old_domain
FROM managed_repos mr
JOIN domains d ON d.id = mr.domain_id
ORDER BY mr.slug
"""
)
)
from_file = 0
derived = 0
for repo_slug, status, old_domain in rows:
cls = derive_classification(repo_slug, old_domain)
source = "fixture" if repo_slug in REPO_CLASSIFICATIONS else "derived"
if source == "fixture":
from_file += 1
else:
derived += 1
print(
f" {repo_slug:30} [{status:8}] "
f"{old_domain:20}{cls['category']:12} · {cls['domain']:15} "
f"({source}, by={cls.get('classified_by', 'migration')})"
)
print(f"\nSummary: {from_file} from REPO_CLASSIFICATIONS, {derived} derived")
async def _report_dispositions(session: AsyncSession) -> None:
_section("Repo dispositions")
if not REPO_DISPOSITIONS:
print(" (none)")
return
for slug, disp in REPO_DISPOSITIONS.items():
repo = await session.execute(
text("SELECT 1 FROM managed_repos WHERE slug = :slug"),
{"slug": slug},
)
managed = repo.fetchone()
state = "found" if managed else "MISSING"
print(f" {slug:25} [{state}] action={disp['action']}")
if disp.get("target_slug"):
print(f" target: {disp['target_slug']}")
if disp.get("archive"):
print(" would archive phantom/duplicate row")
async def _report_workplan_anchors(session: AsyncSession) -> None:
_section("Workplan repo_id backfill (would-be)")
rows = await session.execute(
text(
"""
SELECT ws.slug, ws.repo_id, t.slug AS topic_slug, d.slug AS domain_slug,
mr.slug AS current_repo
FROM workstreams ws
LEFT JOIN topics t ON t.id = ws.topic_id
LEFT JOIN domains d ON d.id = t.domain_id
LEFT JOIN managed_repos mr ON mr.id = ws.repo_id
ORDER BY ws.slug
"""
)
)
null_count = 0
for ws_slug, repo_id, topic_slug, domain_slug, current_repo in rows:
if repo_id is None:
null_count += 1
print(
f" NEEDS ANCHOR {ws_slug:40} topic={topic_slug or '-':20} "
f"domain={domain_slug or '-'}"
)
else:
print(f" ok {ws_slug:40} repo={current_repo}")
print(f"\nWorkstreams with NULL repo_id: {null_count}")
if null_count:
print(f"Orphans would fall back to: {FALLBACK_REPO_SLUG}")
async def _report_topic_domain_updates(session: AsyncSession) -> None:
_section("Topic / domain_goal domain_id remapping")
for old_slug, market_slug in OLD_DOMAIN_TO_MARKET.items():
topic_count = await session.execute(
text(
"""
SELECT COUNT(*) FROM topics t
JOIN domains d ON d.id = t.domain_id
WHERE d.slug = :old_slug
"""
),
{"old_slug": old_slug},
)
goal_count = await session.execute(
text(
"""
SELECT COUNT(*) FROM domain_goals dg
JOIN domains d ON d.id = dg.domain_id
WHERE d.slug = :old_slug
"""
),
{"old_slug": old_slug},
)
tc = topic_count.scalar_one()
gc = goal_count.scalar_one()
if tc or gc:
print(f" {old_slug:22}{market_slug:15} topics={tc} domain_goals={gc}")
async def _report_table_renames(session: AsyncSession) -> None:
_section("Schema renames (structural)")
fk_tables = [
"tasks.workstream_id",
"decisions.workstream_id",
"progress_events.workstream_id",
"token_events.workstream_id",
"contributions.related_workstream_id",
"extension_points.workstream_id",
"technical_debt.workstream_id",
"capability_requests.requesting_workstream_id",
"capability_requests.fulfilling_workstream_id",
"workplan_launch_requests.workstream_id",
]
for item in fk_tables:
print(f" {item}{item.replace('workstream', 'workplan')}")
print(" workstreams → workplans")
print(" workstream_dependencies → workplan_dependencies")
print(" from_workstream_id → from_workplan_id")
print(" to_workstream_id → to_workplan_id")
async def main() -> None:
print("STATE-WP-0065 P1 — Spine migration dry-run report")
print("(read-only; no changes applied)")
async with async_session_factory() as session:
await _report_domains(session)
await _report_classifications(session)
await _report_dispositions(session)
await _report_workplan_anchors(session)
await _report_topic_domain_updates(session)
await _report_table_renames(session)
await engine.dispose()
print()
print("Dry-run complete. Review the report before running:")
print(" alembic upgrade d8e9f0a1b2c3")
if __name__ == "__main__":
asyncio.run(main())