feat(classification-spine): implement STATE-WP-0065 repo-anchored model

Replace the ad-hoc coordination-domain spine with the Repo Classification Standard: 14 market domains, classification columns on managed_repos, and workplans anchored by repo_id (topic_id optional). - Add Alembic migration d8e9f0a1b2c3 with data backfill and workstream→workplan rename - Add api/classification.py validation and register-from-classification tooling - Expose workplan-first REST/MCP surface with legacy workstream aliases - Add C-24 consistency rule and legacy domain frontmatter mapping - Update dashboard repos page with category/capability/stake filters - Update orientation docs; mark STATE-WP-0065 finished
2026-06-22 13:52:13 +02:00
parent 279be4ffbd
commit 0949d4c0d8
84 changed files with 4494 additions and 1111 deletions
--- a/scripts/consistency_check.py
+++ b/scripts/consistency_check.py
@@ -26,6 +26,7 @@ Checks:
  C-20  workstream-dependency-missing WARN Yes Workplan dependency frontmatter missing from DB graph
  C-22  task-description-drift  WARN  Yes  Task description/content differs between file and DB
  C-23  workstream-active-task-planning-status WARN Yes Workstream/workplan is planning while a task is progress or wait
+  C-24  repo-classification-missing WARN  No   Registered repo lacks a valid .repo-classification.yaml on disk

 Usage:
    python scripts/consistency_check.py --repo SLUG [--fix] [--no-writeback] [--json] [--api-base URL]
@@ -42,7 +43,7 @@ Exit codes (--remote --all scheduled sweep):
    1 — automation error: API unreachable, repo list fetch failed, C-00 on
        any repo, or other infrastructure fault that prevented a full run

-Assessment failures (C-01..C-23 except C-00) are repo hygiene gaps recorded
+Assessment failures (C-01..C-24 except C-00) are repo hygiene gaps recorded
 in the sweep report for later improvement. They do not fail the scheduler.

 Agent/operator Make wrappers normalize exit code 2 to shell success while
@@ -78,6 +79,11 @@ from api.workplan_status import (  # noqa: E402
    normalize_workstream_status as _normalize_workstream_status,
    ready_review_status,
 )
+from api.classification import (  # noqa: E402
+    CLASSIFICATION_FILENAME,
+    load_classification_file,
+    resolve_topic_domain_slug,
+)
 from api.services.lifecycle import should_activate_parent_for_active_tasks  # noqa: E402
 from api.task_status import (  # noqa: E402
    CANONICAL_TASK_STATUSES,
@@ -713,6 +719,31 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N

    repo_dir = Path(repo_path)
    workplans_dir = repo_dir / "workplans"
+    repo_market_domain = str(repo.get("domain_slug") or "").strip()
+
+    # C-24: repo classification file missing or invalid (always WARN — migration rows too)
+    class_data, class_errors, class_warnings = load_classification_file(repo_dir)
+    if class_data is None:
+        classified_by = str(repo.get("classified_by") or "").strip()
+        if class_errors:
+            detail = "; ".join(class_errors)
+        else:
+            detail = f"{CLASSIFICATION_FILENAME} missing on disk"
+        if classified_by == "migration":
+            detail = f"{detail} (DB row is migration-derived — commit a human-reviewed file when ready)"
+        report.add(
+            severity="WARN",
+            check_id="C-24",
+            message=f"Repo classification gap: {detail}",
+            fixable=False,
+        )
+    for warning in class_warnings:
+        report.add(
+            severity="WARN",
+            check_id="C-24",
+            message=f"Repo classification advisory: {warning}",
+            fixable=False,
+        )

    # C-01: workplans/ directory missing
    if not workplans_dir.is_dir():
@@ -804,6 +835,7 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
                    "body": body,
                    "repo_id": repo_id,
                    "domain": file_domain,
+                    "repo_market_domain": repo_market_domain,
                },
            )
            continue
@@ -1708,6 +1740,7 @@ def fix_repo(
                wp_file = Path(ctx["wp_file"])
                meta = ctx["meta"]
                domain = ctx["domain"]
+                repo_market_domain = str(ctx.get("repo_market_domain") or "").strip()
                repo_id_val = ctx["repo_id"]
                body = ctx.get("body", "")
                wp_id = str(meta.get("id", "")).strip()
@@ -1717,17 +1750,23 @@ def fix_repo(
                if status not in VALID_WP_STATUSES:
                    status = "active"

-                # Find topic_id for this domain
+                # Find topic_id — workplan frontmatter may still use legacy
+                # coordination slugs (e.g. custodian); map to market domain first.
+                topic_domain = resolve_topic_domain_slug(
+                    domain,
+                    repo_market_domain=repo_market_domain or None,
+                )
                topics = _api_get(api_base, "/topics")
                topic_id = None
                if isinstance(topics, list):
                    for t in topics:
-                        if t.get("domain_slug") == domain:
+                        if t.get("domain_slug") == topic_domain:
                            topic_id = t["id"]
                            break
                if topic_id is None:
                    report.fixes_applied.append(
-                        f"C-06 SKIP {wp_id}: no topic found for domain '{domain}'"
+                        f"C-06 SKIP {wp_id}: no topic found for domain "
+                        f"'{topic_domain}' (workplan domain={domain!r})"
                    )
                    continue

--- a/scripts/register_from_classification.py
+++ b/scripts/register_from_classification.py
@@ -0,0 +1,635 @@
+#!/usr/bin/env python3
+"""Idempotent registration from committed ``.repo-classification.yaml`` (STATE-WP-0065 P3).
+
+Reads classification from a repo checkout, validates against the canon allowed-values,
+and upserts the ``managed_repos`` row (create or update classification + market domain).
+
+Usage:
+    python scripts/register_from_classification.py --repo-path /path/to/repo [--dry-run]
+    python scripts/register_from_classification.py --slug state-hub [--dry-run]
+    python scripts/register_from_classification.py --bulk [--dry-run]
+    python scripts/register_from_classification.py --help
+"""
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import re
+import socket
+import subprocess
+import sys
+from dataclasses import dataclass, field
+from datetime import date
+from pathlib import Path
+from typing import Any, Literal
+
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from sqlalchemy import select  # noqa: E402
+
+from api.classification import (  # noqa: E402
+    CLASSIFICATION_FILENAME,
+    ClassificationData,
+    load_classification_file,
+)
+from api.config import settings  # noqa: E402
+from api.database import async_session_factory, engine  # noqa: E402
+from api.models.domain import Domain  # noqa: E402
+from api.models.managed_repo import ManagedRepo  # noqa: E402
+
+try:
+    import httpx
+
+    _HAS_HTTPX = True
+except ImportError:
+    _HAS_HTTPX = False
+
+Outcome = Literal["registered", "updated", "skipped", "invalid"]
+
+
+@dataclass
+class RowResult:
+    slug: str
+    path: str
+    outcome: Outcome
+    detail: str = ""
+    warnings: list[str] = field(default_factory=list)
+
+
+@dataclass
+class RegistrationReport:
+    results: list[RowResult] = field(default_factory=list)
+
+    def add(self, result: RowResult) -> None:
+        self.results.append(result)
+
+    def counts(self) -> dict[str, int]:
+        totals = {"registered": 0, "updated": 0, "skipped": 0, "invalid": 0}
+        for row in self.results:
+            totals[row.outcome] = totals.get(row.outcome, 0) + 1
+        return totals
+
+    def render_text(self) -> str:
+        lines = ["register-from-classification report", ""]
+        for row in self.results:
+            lines.append(f"  [{row.outcome:10}] {row.slug:30} {row.detail}")
+            for warning in row.warnings:
+                lines.append(f"             warn: {warning}")
+        counts = self.counts()
+        lines.append("")
+        lines.append(
+            "Summary: "
+            f"registered={counts['registered']} "
+            f"updated={counts['updated']} "
+            f"skipped={counts['skipped']} "
+            f"invalid={counts['invalid']}"
+        )
+        return "\n".join(lines)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "summary": self.counts(),
+            "results": [
+                {
+                    "slug": r.slug,
+                    "path": r.path,
+                    "outcome": r.outcome,
+                    "detail": r.detail,
+                    "warnings": r.warnings,
+                }
+                for r in self.results
+            ],
+        }
+
+
+def _slugify(name: str) -> str:
+    slug = re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")
+    return slug or "repo"
+
+
+def _parse_classified_at(value: str | None) -> date | None:
+    if not value:
+        return None
+    try:
+        return date.fromisoformat(str(value)[:10])
+    except ValueError:
+        return None
+
+
+def _git_value(repo_path: Path, args: list[str]) -> str | None:
+    try:
+        return subprocess.check_output(
+            ["git", *args],
+            cwd=repo_path,
+            stderr=subprocess.DEVNULL,
+            text=True,
+        ).strip() or None
+    except (subprocess.CalledProcessError, FileNotFoundError, OSError):
+        return None
+
+
+def _git_root(path: Path) -> Path:
+    root = _git_value(path, ["rev-parse", "--show-toplevel"])
+    return Path(root) if root else path.resolve()
+
+
+def _resolve_repo_path_for_host(repo: ManagedRepo) -> str | None:
+    hostname = socket.gethostname()
+    host_paths = repo.host_paths or {}
+    path = host_paths.get(hostname) or repo.local_path
+    if path and Path(path).is_dir():
+        return path
+    for candidate in host_paths.values():
+        if candidate and Path(candidate).is_dir():
+            return candidate
+    return None
+
+
+def _classification_changed(repo: ManagedRepo, data: ClassificationData, domain_id) -> bool:
+    if repo.domain_id != domain_id:
+        return True
+    fields = (
+        ("category", data.category),
+        ("secondary_domains", data.secondary_domains or None),
+        ("capability_tags", data.capability_tags or None),
+        ("business_stake", data.business_stake or None),
+        ("business_mechanics", data.business_mechanics or None),
+        ("classified_at", _parse_classified_at(data.classified_at)),
+        ("classified_by", data.classified_by),
+        ("standard_version", data.standard_version),
+    )
+    for attr, new_val in fields:
+        if getattr(repo, attr) != new_val:
+            return True
+    return False
+
+
+def _apply_classification(repo: ManagedRepo, data: ClassificationData, domain_id) -> None:
+    repo.domain_id = domain_id
+    repo.category = data.category
+    repo.secondary_domains = data.secondary_domains or None
+    repo.capability_tags = data.capability_tags or None
+    repo.business_stake = data.business_stake or None
+    repo.business_mechanics = data.business_mechanics or None
+    repo.classified_at = _parse_classified_at(data.classified_at)
+    repo.classified_by = data.classified_by
+    repo.standard_version = data.standard_version
+
+
+async def _get_domain_id(session, market_slug: str):
+    result = await session.execute(select(Domain).where(Domain.slug == market_slug))
+    domain = result.scalar_one_or_none()
+    if domain is None:
+        raise ValueError(f"Market domain '{market_slug}' not found in domains table")
+    return domain.id
+
+
+async def _get_repo_by_slug(session, slug: str) -> ManagedRepo | None:
+    result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug))
+    return result.scalar_one_or_none()
+
+
+def _api_request(
+    method: str,
+    path: str,
+    *,
+    api_base: str,
+    body: dict | None = None,
+) -> tuple[int, Any]:
+    if not _HAS_HTTPX:
+        return (0, {"_error": "httpx not installed"})
+    url = api_base.rstrip("/") + path
+    try:
+        with httpx.Client(timeout=30.0) as client:
+            response = client.request(method, url, json=body)
+            if response.status_code == 204:
+                return response.status_code, None
+            try:
+                payload = response.json()
+            except Exception:
+                payload = {"_raw": response.text}
+            return response.status_code, payload
+    except httpx.HTTPError as exc:
+        return (0, {"_error": str(exc)})
+
+
+async def _upsert_via_db(
+    *,
+    slug: str,
+    repo_path: Path,
+    data: ClassificationData,
+    dry_run: bool,
+    report: RegistrationReport,
+) -> None:
+    git_root = _git_root(repo_path)
+    remote_url = _git_value(git_root, ["remote", "get-url", "origin"])
+    git_fingerprint = _git_value(git_root, ["rev-list", "--max-parents=0", "HEAD"])
+    hostname = socket.gethostname()
+    display_name = git_root.name.replace("-", " ").replace("_", " ").title()
+
+    async with async_session_factory() as session:
+        try:
+            domain_id = await _get_domain_id(session, data.domain)
+        except ValueError as exc:
+            if dry_run:
+                report.add(
+                    RowResult(
+                        slug,
+                        str(git_root),
+                        "skipped",
+                        f"dry-run: {exc}",
+                    )
+                )
+                return
+            report.add(RowResult(slug, str(git_root), "invalid", str(exc)))
+            return
+
+        repo = await _get_repo_by_slug(session, slug)
+        if repo is None:
+            if dry_run:
+                report.add(
+                    RowResult(
+                        slug,
+                        str(git_root),
+                        "registered",
+                        f"would create repo under domain '{data.domain}' (dry-run)",
+                    )
+                )
+                return
+            repo = ManagedRepo(
+                domain_id=domain_id,
+                slug=slug,
+                name=display_name,
+                local_path=str(git_root),
+                host_paths={hostname: str(git_root)},
+                remote_url=remote_url,
+                git_fingerprint=git_fingerprint,
+                status="active",
+            )
+            _apply_classification(repo, data, domain_id)
+            session.add(repo)
+            await session.commit()
+            report.add(
+                RowResult(slug, str(git_root), "registered", f"domain={data.domain}")
+            )
+            return
+
+        warnings: list[str] = []
+        if not _classification_changed(repo, data, domain_id):
+            if repo.local_path != str(git_root):
+                if dry_run:
+                    report.add(
+                        RowResult(
+                            slug,
+                            str(git_root),
+                            "skipped",
+                            "classification unchanged; would refresh local_path (dry-run)",
+                        )
+                    )
+                    return
+                repo.local_path = str(git_root)
+                host_paths = dict(repo.host_paths or {})
+                host_paths[hostname] = str(git_root)
+                repo.host_paths = host_paths
+                if remote_url:
+                    repo.remote_url = remote_url
+                if git_fingerprint:
+                    repo.git_fingerprint = git_fingerprint
+                await session.commit()
+                report.add(
+                    RowResult(slug, str(git_root), "skipped", "paths refreshed only")
+                )
+                return
+            report.add(
+                RowResult(slug, str(git_root), "skipped", "classification already current")
+            )
+            return
+
+        if dry_run:
+            report.add(
+                RowResult(
+                    slug,
+                    str(git_root),
+                    "updated",
+                    f"would update classification (domain={data.domain}) (dry-run)",
+                )
+            )
+            return
+
+        _apply_classification(repo, data, domain_id)
+        repo.local_path = str(git_root)
+        host_paths = dict(repo.host_paths or {})
+        host_paths[hostname] = str(git_root)
+        repo.host_paths = host_paths
+        if remote_url:
+            repo.remote_url = remote_url
+        if git_fingerprint:
+            repo.git_fingerprint = git_fingerprint
+        await session.commit()
+        report.add(
+            RowResult(slug, str(git_root), "updated", f"domain={data.domain}")
+        )
+
+
+async def _upsert_via_api(
+    *,
+    slug: str,
+    repo_path: Path,
+    data: ClassificationData,
+    dry_run: bool,
+    api_base: str,
+    report: RegistrationReport,
+) -> None:
+    git_root = _git_root(repo_path)
+    remote_url = _git_value(git_root, ["remote", "get-url", "origin"])
+    git_fingerprint = _git_value(git_root, ["rev-list", "--max-parents=0", "HEAD"])
+    hostname = socket.gethostname()
+    display_name = git_root.name.replace("-", " ").replace("_", " ").title()
+
+    status, existing = _api_request("GET", f"/repos/{slug}", api_base=api_base)
+    if status == 404 or (isinstance(existing, dict) and existing.get("detail")):
+        existing = None
+    elif status == 0:
+        report.add(
+            RowResult(
+                slug,
+                str(git_root),
+                "invalid",
+                f"API unreachable: {existing.get('_error', existing)}",
+            )
+        )
+        return
+
+    patch_body = {
+        "category": data.category,
+        "secondary_domains": data.secondary_domains,
+        "capability_tags": data.capability_tags,
+        "business_stake": data.business_stake,
+        "business_mechanics": data.business_mechanics,
+        "classified_at": data.classified_at,
+        "classified_by": data.classified_by,
+        "standard_version": data.standard_version,
+        "domain_slug": data.domain,
+        "local_path": str(git_root),
+        "remote_url": remote_url,
+        "git_fingerprint": git_fingerprint,
+    }
+
+    if existing is None:
+        if dry_run:
+            report.add(
+                RowResult(
+                    slug,
+                    str(git_root),
+                    "registered",
+                    f"would POST /repos/ domain={data.domain} (dry-run)",
+                )
+            )
+            return
+        post_body = {
+            "domain_slug": data.domain,
+            "slug": slug,
+            "name": display_name,
+            "local_path": str(git_root),
+            "host_paths": {hostname: str(git_root)},
+            "remote_url": remote_url,
+            "git_fingerprint": git_fingerprint,
+        }
+        code, created = _api_request("POST", "/repos/", api_base=api_base, body=post_body)
+        if code not in (200, 201):
+            detail = created.get("detail", created) if isinstance(created, dict) else created
+            report.add(RowResult(slug, str(git_root), "invalid", f"POST failed: {detail}"))
+            return
+        code, updated = _api_request(
+            "PATCH", f"/repos/{slug}", api_base=api_base, body=patch_body
+        )
+        if code != 200:
+            detail = updated.get("detail", updated) if isinstance(updated, dict) else updated
+            report.add(
+                RowResult(
+                    slug,
+                    str(git_root),
+                    "invalid",
+                    f"created repo but classification PATCH failed: {detail}",
+                )
+            )
+            return
+        report.add(RowResult(slug, str(git_root), "registered", f"domain={data.domain}"))
+        return
+
+    if dry_run:
+        report.add(
+            RowResult(
+                slug,
+                str(git_root),
+                "updated",
+                f"would PATCH /repos/{slug} domain={data.domain} (dry-run)",
+            )
+        )
+        return
+
+    code, updated = _api_request(
+        "PATCH", f"/repos/{slug}", api_base=api_base, body=patch_body
+    )
+    if code != 200:
+        detail = updated.get("detail", updated) if isinstance(updated, dict) else updated
+        report.add(RowResult(slug, str(git_root), "invalid", f"PATCH failed: {detail}"))
+        return
+    _api_request(
+        "POST",
+        f"/repos/{slug}/paths",
+        api_base=api_base,
+        body={"host": hostname, "path": str(git_root)},
+    )
+    report.add(RowResult(slug, str(git_root), "updated", f"domain={data.domain}"))
+
+
+async def register_one(
+    *,
+    slug: str,
+    repo_path: Path,
+    dry_run: bool = False,
+    use_api: bool = False,
+    api_base: str | None = None,
+    report: RegistrationReport | None = None,
+) -> RowResult:
+    """Register or update a single repo from its classification file."""
+    report = report or RegistrationReport()
+    git_root = _git_root(repo_path)
+    data, errors, warnings = load_classification_file(git_root)
+    if data is None:
+        result = RowResult(
+            slug,
+            str(git_root),
+            "invalid",
+            "; ".join(errors) or "classification invalid",
+            warnings=warnings,
+        )
+        report.add(result)
+        return result
+
+    if use_api:
+        await _upsert_via_api(
+            slug=slug,
+            repo_path=git_root,
+            data=data,
+            dry_run=dry_run,
+            api_base=api_base or settings.api_base,
+            report=report,
+        )
+    else:
+        await _upsert_via_db(
+            slug=slug,
+            repo_path=git_root,
+            data=data,
+            dry_run=dry_run,
+            report=report,
+        )
+    return report.results[-1]
+
+
+async def _bulk_targets(session) -> list[tuple[str, str]]:
+    result = await session.execute(
+        select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.slug)
+    )
+    targets: list[tuple[str, str]] = []
+    for repo in result.scalars().all():
+        path = _resolve_repo_path_for_host(repo)
+        if path:
+            targets.append((repo.slug, path))
+    return targets
+
+
+async def run_registration(args: argparse.Namespace) -> RegistrationReport:
+    report = RegistrationReport()
+    use_api = args.api and not args.db
+
+    if args.bulk:
+        async with async_session_factory() as session:
+            targets = await _bulk_targets(session)
+        if not targets:
+            report.add(
+                RowResult("(bulk)", "", "skipped", "no active repos with accessible local paths")
+            )
+            return report
+        for slug, path in targets:
+            await register_one(
+                slug=slug,
+                repo_path=Path(path),
+                dry_run=args.dry_run,
+                use_api=use_api,
+                api_base=args.api_base,
+                report=report,
+            )
+        return report
+
+    if args.repo_path:
+        repo_path = Path(args.repo_path).expanduser().resolve()
+        slug = args.slug or _slugify(_git_root(repo_path).name)
+        await register_one(
+            slug=slug,
+            repo_path=repo_path,
+            dry_run=args.dry_run,
+            use_api=use_api,
+            api_base=args.api_base,
+            report=report,
+        )
+        return report
+
+    if args.slug:
+        async with async_session_factory() as session:
+            repo = await _get_repo_by_slug(session, args.slug)
+        if repo is None:
+            report.add(RowResult(args.slug, "", "invalid", "repo slug not found in DB"))
+            return report
+        path = _resolve_repo_path_for_host(repo)
+        if not path:
+            report.add(
+                RowResult(
+                    args.slug,
+                    "",
+                    "invalid",
+                    "no accessible local path (local_path / host_paths)",
+                )
+            )
+            return report
+        await register_one(
+            slug=args.slug,
+            repo_path=Path(path),
+            dry_run=args.dry_run,
+            use_api=use_api,
+            api_base=args.api_base,
+            report=report,
+        )
+        return report
+
+    raise SystemExit("Specify --repo-path PATH, --slug SLUG, or --bulk")
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Register or update managed_repos from .repo-classification.yaml",
+    )
+    parser.add_argument("--repo-path", metavar="PATH", help="Local git checkout path")
+    parser.add_argument(
+        "--slug",
+        metavar="SLUG",
+        help="Registered repo slug (required with --bulk omitted unless --repo-path given)",
+    )
+    parser.add_argument(
+        "--bulk",
+        action="store_true",
+        help="All active registered repos with accessible local paths",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Report actions without writing to DB/API",
+    )
+    parser.add_argument(
+        "--api",
+        action="store_true",
+        help="Upsert via REST API (default: direct DB session)",
+    )
+    parser.add_argument(
+        "--db",
+        action="store_true",
+        help="Force direct DB session (overrides --api)",
+    )
+    parser.add_argument(
+        "--api-base",
+        default=settings.api_base,
+        help=f"State Hub API base URL (default: {settings.api_base})",
+    )
+    parser.add_argument("--json", action="store_true", help="Emit JSON report")
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    if args.bulk:
+        if args.repo_path:
+            parser.error("--bulk cannot be combined with --repo-path")
+    elif args.repo_path:
+        pass
+    elif args.slug:
+        pass
+    else:
+        parser.error("Specify one of --repo-path PATH, --slug SLUG, or --bulk")
+
+    report = asyncio.run(run_registration(args))
+    if args.json:
+        print(json.dumps(report.to_dict(), indent=2))
+    else:
+        print(report.render_text())
+
+    counts = report.counts()
+    return 1 if counts["invalid"] else 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/spine_migration_data.py
+++ b/scripts/spine_migration_data.py
@@ -0,0 +1,334 @@
+"""Constants and mappings for STATE-WP-0065 P1 spine migration.
+
+Shared by the Alembic revision and the dry-run report script.
+"""
+from __future__ import annotations
+
+import uuid
+from datetime import date
+
+# Deterministic UUIDs for market-domain rows (stable across runs).
+_MARKET_DOMAIN_NAMESPACE = uuid.UUID("8dc7d106-11e2-41df-b512-89ed69d2a65f")
+
+# 14 fixed market domains from Repo Classification Standard v1.0 §6.
+MARKET_DOMAINS: list[tuple[str, str]] = [
+    ("infotech", "Infotech"),
+    ("financials", "Financials"),
+    ("communication", "Communication"),
+    ("consumer", "Consumer"),
+    ("health", "Health"),
+    ("industrials", "Industrials"),
+    ("energy", "Energy"),
+    ("utilities", "Utilities"),
+    ("materials", "Materials"),
+    ("realestate", "Real Estate"),
+    ("crypto", "Crypto"),
+    ("agents", "Agents"),
+    ("space", "Space"),
+    ("government", "Government"),
+]
+
+# Old coordination-domain slugs (pre-migration ``domains`` table) → market domain.
+OLD_DOMAIN_TO_MARKET: dict[str, str] = {
+    "custodian": "infotech",
+    "railiance": "financials",
+    "markitect": "communication",
+    "coulomb_social": "communication",
+    "personhood": "government",
+    "foerster_capabilities": "agents",
+    # Extended coordination domains (beyond the original 6 canonical seeds).
+    "capabilities": "agents",
+    "canon": "infotech",
+    "citation_evidence": "infotech",
+    "helix_forge": "infotech",
+    "inter_hub": "infotech",
+    "netkingdom": "communication",
+    "stack": "infotech",
+    "vergabe_teilnahme": "government",
+    "whynot": "consumer",
+    "test_domain_v2": "infotech",
+}
+
+# Best-effort reverse map for downgrade (lossy: many market domains → one old slug).
+MARKET_TO_OLD_DOMAIN: dict[str, str] = {
+    market: old
+    for old, market in OLD_DOMAIN_TO_MARKET.items()
+}
+
+# Legacy coordination domains restored on downgrade.
+OLD_COORDINATION_DOMAINS: list[tuple[str, str]] = [
+    ("custodian", "The Custodian"),
+    ("railiance", "Railiance"),
+    ("markitect", "Markitect"),
+    ("coulomb_social", "Coulomb.social"),
+    ("personhood", "Personhood"),
+    ("foerster_capabilities", "Foerster Capabilities"),
+]
+
+# Human-reviewed classifications for the 11 custodian-domain fixture repos.
+REPO_CLASSIFICATIONS: dict[str, dict] = {
+    "the-custodian": {
+        "category": "research",
+        "domain": "infotech",
+        "secondary_domains": ["agents"],
+        "capability_tags": [
+            "governance",
+            "knowledge",
+            "coordination",
+            "policy",
+            "documentation",
+        ],
+        "business_stake": ["technology", "operations", "intelligence", "execution"],
+        "business_mechanics": ["intention", "control", "coordination", "adaptation"],
+    },
+    "inter-hub": {
+        "category": "research",
+        "domain": "infotech",
+        "secondary_domains": ["agents"],
+        "capability_tags": [
+            "governance",
+            "observability",
+            "platform",
+            "coordination",
+            "orchestration",
+        ],
+        "business_stake": ["technology", "intelligence", "operations"],
+        "business_mechanics": ["control", "coordination", "adaptation"],
+    },
+    "state-hub": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": ["agents"],
+        "capability_tags": [
+            "coordination",
+            "knowledge",
+            "platform",
+            "observability",
+            "governance",
+        ],
+        "business_stake": [
+            "technology",
+            "operations",
+            "product",
+            "intelligence",
+            "automation",
+        ],
+        "business_mechanics": ["coordination", "control", "operation", "adaptation"],
+    },
+    "hub-core": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": [],
+        "capability_tags": ["platform", "configuration", "orchestration"],
+        "business_stake": ["technology", "execution", "product"],
+        "business_mechanics": ["operation"],
+    },
+    "activity-core": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": ["agents"],
+        "capability_tags": [
+            "workflow",
+            "orchestration",
+            "automation",
+            "coordination",
+            "observability",
+        ],
+        "business_stake": ["technology", "operations", "automation", "execution"],
+        "business_mechanics": ["coordination", "operation", "adaptation"],
+    },
+    "issue-core": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": ["agents"],
+        "capability_tags": [
+            "workflow",
+            "coordination",
+            "orchestration",
+            "traceability",
+        ],
+        "business_stake": ["technology", "product", "operations", "automation"],
+        "business_mechanics": ["coordination", "operation"],
+    },
+    "kaizen-agentic": {
+        "category": "tooling",
+        "domain": "agents",
+        "secondary_domains": ["infotech"],
+        "capability_tags": [
+            "orchestration",
+            "automation",
+            "coordination",
+            "knowledge",
+            "documentation",
+        ],
+        "business_stake": [
+            "technology",
+            "product",
+            "automation",
+            "people",
+            "intelligence",
+        ],
+        "business_mechanics": [
+            "intention",
+            "coordination",
+            "operation",
+            "adaptation",
+        ],
+    },
+    "llm-connect": {
+        "category": "tooling",
+        "domain": "agents",
+        "secondary_domains": ["infotech"],
+        "capability_tags": [
+            "orchestration",
+            "model-routing",
+            "configuration",
+            "automation",
+        ],
+        "business_stake": ["technology", "product", "automation"],
+        "business_mechanics": ["operation", "adaptation"],
+    },
+    "ops-bridge": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": [],
+        "capability_tags": [
+            "operations",
+            "access-control",
+            "platform",
+            "observability",
+            "orchestration",
+        ],
+        "business_stake": ["operations", "technology", "automation"],
+        "business_mechanics": ["control", "operation", "adaptation"],
+    },
+    "ops-warden": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": [],
+        "capability_tags": [
+            "identity",
+            "access-control",
+            "security",
+            "policy",
+            "audit",
+            "governance",
+        ],
+        "business_stake": ["technology", "operations", "legal", "automation"],
+        "business_mechanics": ["control", "operation"],
+    },
+    "email-connect": {
+        "category": "tooling",
+        "domain": "infotech",
+        "secondary_domains": ["communication"],
+        "capability_tags": [
+            "evidence",
+            "traceability",
+            "source-management",
+            "automation",
+        ],
+        "business_stake": ["technology", "operations", "legal"],
+        "business_mechanics": ["operation", "coordination"],
+    },
+}
+
+# Repo discrepancy resolution (STATE-WP-0065 §P1 data migration).
+REPO_DISPOSITIONS: dict[str, dict] = {
+    "markitect-project": {
+        "action": "relink_to",
+        "target_slug": "markitect-main",
+        "archive": True,
+    },
+    "railiance-bootstrap": {
+        "action": "archive",
+    },
+    "railiance-hosts": {
+        "action": "archive",
+    },
+    "vergabe_teilnahme": {
+        "action": "collapse_into",
+        "target_slug": "vergabe-teilnahme",
+        "archive": True,
+    },
+}
+
+# Fallback repo slug for orphan workplans after backfill.
+FALLBACK_REPO_SLUG = "state-hub"
+
+STANDARD_VERSION = "1.0"
+
+
+def market_domain_uuid(slug: str) -> str:
+    """Deterministic UUID string for a market-domain slug."""
+    return str(uuid.uuid5(_MARKET_DOMAIN_NAMESPACE, f"state-hub.market-domain.{slug}"))
+
+
+def old_domain_uuid(slug: str) -> str:
+    """Deterministic UUID string for a legacy coordination-domain slug."""
+    return str(uuid.uuid5(_MARKET_DOMAIN_NAMESPACE, f"state-hub.coordination-domain.{slug}"))
+
+
+def derive_classification(repo_slug: str, old_domain_slug: str | None) -> dict:
+    """Return a classification dict for *repo_slug*.
+
+    Uses committed ``REPO_CLASSIFICATIONS`` when present; otherwise derives a
+    migration-time classification from the old coordination domain.
+    """
+    if repo_slug in REPO_CLASSIFICATIONS:
+        data = dict(REPO_CLASSIFICATIONS[repo_slug])
+        data.setdefault("classified_by", "human")
+        return data
+
+    market = OLD_DOMAIN_TO_MARKET.get(old_domain_slug or "", "infotech")
+
+    # Domain-specific heuristics for repos without committed classification files.
+    category = "project"
+    secondary_domains: list[str] = []
+    capability_tags: list[str] = []
+    business_stake: list[str] = []
+    business_mechanics: list[str] = []
+
+    if old_domain_slug == "custodian":
+        category = "tooling"
+        capability_tags = ["platform"]
+        business_stake = ["technology", "operations"]
+    elif old_domain_slug == "railiance":
+        category = "project"
+        capability_tags = ["platform", "operations"]
+        business_stake = ["technology", "operations"]
+    elif old_domain_slug == "markitect":
+        category = "project"
+        capability_tags = ["knowledge", "documentation"]
+        business_stake = ["technology", "product"]
+    elif old_domain_slug == "coulomb_social":
+        category = "experimental"
+        capability_tags = ["marketplace", "collaboration"]
+        business_stake = ["product", "sales"]
+    elif old_domain_slug == "personhood":
+        category = "research"
+        capability_tags = ["governance", "policy"]
+        business_stake = ["legal", "technology", "intelligence"]
+        business_mechanics = ["intention", "control"]
+    elif old_domain_slug == "foerster_capabilities":
+        category = "research"
+        capability_tags = ["knowledge"]
+        business_stake = ["intelligence", "technology"]
+
+    return {
+        "category": category,
+        "domain": market,
+        "secondary_domains": secondary_domains,
+        "capability_tags": capability_tags,
+        "business_stake": business_stake,
+        "business_mechanics": business_mechanics,
+        "classified_by": "migration",
+    }
+
+
+def migration_provenance() -> dict:
+    """Provenance fields applied during Alembic backfill."""
+    return {
+        "classified_at": date.today().isoformat(),
+        "classified_by": "migration",
+        "standard_version": STANDARD_VERSION,
+    }
--- a/scripts/spine_migration_dry_run.py
+++ b/scripts/spine_migration_dry_run.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+"""Dry-run report for STATE-WP-0065 P1 spine migration.
+
+Prints would-be classification, domain, repo-disposition, and workplan-anchor
+changes without applying them.  Requires a live PostgreSQL connection (same
+DATABASE_URL as the API).
+"""
+from __future__ import annotations
+
+import asyncio
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+from sqlalchemy import text  # noqa: E402
+from sqlalchemy.ext.asyncio import AsyncSession  # noqa: E402
+
+from api.database import async_session_factory, engine  # noqa: E402
+from scripts.spine_migration_data import (  # noqa: E402
+    FALLBACK_REPO_SLUG,
+    MARKET_DOMAINS,
+    OLD_DOMAIN_TO_MARKET,
+    REPO_CLASSIFICATIONS,
+    REPO_DISPOSITIONS,
+    derive_classification,
+    market_domain_uuid,
+)
+
+
+def _section(title: str) -> None:
+    print()
+    print("=" * 72)
+    print(title)
+    print("=" * 72)
+
+
+async def _report_domains(session: AsyncSession) -> None:
+    _section("Domain spine replacement")
+    result = await session.execute(
+        text("SELECT slug, name FROM domains ORDER BY slug")
+    )
+    current = result.fetchall()
+    current_slugs = {row[0] for row in current}
+    print(f"Current domains ({len(current)}):")
+    for slug, name in current:
+        mapped = OLD_DOMAIN_TO_MARKET.get(slug, "(no mapping — would delete)")
+        print(f"  {slug:25} → {mapped}")
+
+    print(f"\nMarket domains to insert ({len(MARKET_DOMAINS)}):")
+    for slug, name in MARKET_DOMAINS:
+        flag = "exists" if slug in current_slugs else "NEW"
+        print(f"  [{flag:5}] {slug:20} {name:20} id={market_domain_uuid(slug)}")
+
+
+async def _report_classifications(session: AsyncSession) -> None:
+    _section("Repo classification backfill")
+    rows = await session.execute(
+        text(
+            """
+            SELECT mr.slug, mr.status, d.slug AS old_domain
+            FROM managed_repos mr
+            JOIN domains d ON d.id = mr.domain_id
+            ORDER BY mr.slug
+            """
+        )
+    )
+    from_file = 0
+    derived = 0
+    for repo_slug, status, old_domain in rows:
+        cls = derive_classification(repo_slug, old_domain)
+        source = "fixture" if repo_slug in REPO_CLASSIFICATIONS else "derived"
+        if source == "fixture":
+            from_file += 1
+        else:
+            derived += 1
+        print(
+            f"  {repo_slug:30} [{status:8}] "
+            f"{old_domain:20} → {cls['category']:12} · {cls['domain']:15} "
+            f"({source}, by={cls.get('classified_by', 'migration')})"
+        )
+    print(f"\nSummary: {from_file} from REPO_CLASSIFICATIONS, {derived} derived")
+
+
+async def _report_dispositions(session: AsyncSession) -> None:
+    _section("Repo dispositions")
+    if not REPO_DISPOSITIONS:
+        print("  (none)")
+        return
+    for slug, disp in REPO_DISPOSITIONS.items():
+        repo = await session.execute(
+            text("SELECT 1 FROM managed_repos WHERE slug = :slug"),
+            {"slug": slug},
+        )
+        managed = repo.fetchone()
+        state = "found" if managed else "MISSING"
+        print(f"  {slug:25} [{state}] action={disp['action']}")
+        if disp.get("target_slug"):
+            print(f"    target: {disp['target_slug']}")
+        if disp.get("archive"):
+            print("    would archive phantom/duplicate row")
+
+
+async def _report_workplan_anchors(session: AsyncSession) -> None:
+    _section("Workplan repo_id backfill (would-be)")
+    rows = await session.execute(
+        text(
+            """
+            SELECT ws.slug, ws.repo_id, t.slug AS topic_slug, d.slug AS domain_slug,
+                   mr.slug AS current_repo
+            FROM workstreams ws
+            LEFT JOIN topics t ON t.id = ws.topic_id
+            LEFT JOIN domains d ON d.id = t.domain_id
+            LEFT JOIN managed_repos mr ON mr.id = ws.repo_id
+            ORDER BY ws.slug
+            """
+        )
+    )
+    null_count = 0
+    for ws_slug, repo_id, topic_slug, domain_slug, current_repo in rows:
+        if repo_id is None:
+            null_count += 1
+            print(
+                f"  NEEDS ANCHOR  {ws_slug:40} topic={topic_slug or '-':20} "
+                f"domain={domain_slug or '-'}"
+            )
+        else:
+            print(f"  ok            {ws_slug:40} repo={current_repo}")
+    print(f"\nWorkstreams with NULL repo_id: {null_count}")
+    if null_count:
+        print(f"Orphans would fall back to: {FALLBACK_REPO_SLUG}")
+
+
+async def _report_topic_domain_updates(session: AsyncSession) -> None:
+    _section("Topic / domain_goal domain_id remapping")
+    for old_slug, market_slug in OLD_DOMAIN_TO_MARKET.items():
+        topic_count = await session.execute(
+            text(
+                """
+                SELECT COUNT(*) FROM topics t
+                JOIN domains d ON d.id = t.domain_id
+                WHERE d.slug = :old_slug
+                """
+            ),
+            {"old_slug": old_slug},
+        )
+        goal_count = await session.execute(
+            text(
+                """
+                SELECT COUNT(*) FROM domain_goals dg
+                JOIN domains d ON d.id = dg.domain_id
+                WHERE d.slug = :old_slug
+                """
+            ),
+            {"old_slug": old_slug},
+        )
+        tc = topic_count.scalar_one()
+        gc = goal_count.scalar_one()
+        if tc or gc:
+            print(f"  {old_slug:22} → {market_slug:15}  topics={tc}  domain_goals={gc}")
+
+
+async def _report_table_renames(session: AsyncSession) -> None:
+    _section("Schema renames (structural)")
+    fk_tables = [
+        "tasks.workstream_id",
+        "decisions.workstream_id",
+        "progress_events.workstream_id",
+        "token_events.workstream_id",
+        "contributions.related_workstream_id",
+        "extension_points.workstream_id",
+        "technical_debt.workstream_id",
+        "capability_requests.requesting_workstream_id",
+        "capability_requests.fulfilling_workstream_id",
+        "workplan_launch_requests.workstream_id",
+    ]
+    for item in fk_tables:
+        print(f"  {item} → {item.replace('workstream', 'workplan')}")
+    print("  workstreams → workplans")
+    print("  workstream_dependencies → workplan_dependencies")
+    print("    from_workstream_id → from_workplan_id")
+    print("    to_workstream_id → to_workplan_id")
+
+
+async def main() -> None:
+    print("STATE-WP-0065 P1 — Spine migration dry-run report")
+    print("(read-only; no changes applied)")
+
+    async with async_session_factory() as session:
+        await _report_domains(session)
+        await _report_classifications(session)
+        await _report_dispositions(session)
+        await _report_workplan_anchors(session)
+        await _report_topic_domain_updates(session)
+        await _report_table_renames(session)
+
+    await engine.dispose()
+    print()
+    print("Dry-run complete. Review the report before running:")
+    print("  alembic upgrade d8e9f0a1b2c3")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())