generated from coulomb/repo-seed
feat(classification-spine): implement STATE-WP-0065 repo-anchored model
Replace the ad-hoc coordination-domain spine with the Repo Classification Standard: 14 market domains, classification columns on managed_repos, and workplans anchored by repo_id (topic_id optional). - Add Alembic migration d8e9f0a1b2c3 with data backfill and workstream→workplan rename - Add api/classification.py validation and register-from-classification tooling - Expose workplan-first REST/MCP surface with legacy workstream aliases - Add C-24 consistency rule and legacy domain frontmatter mapping - Update dashboard repos page with category/capability/stake filters - Update orientation docs; mark STATE-WP-0065 finished
This commit is contained in:
635
scripts/register_from_classification.py
Normal file
635
scripts/register_from_classification.py
Normal file
@@ -0,0 +1,635 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Idempotent registration from committed ``.repo-classification.yaml`` (STATE-WP-0065 P3).
|
||||
|
||||
Reads classification from a repo checkout, validates against the canon allowed-values,
|
||||
and upserts the ``managed_repos`` row (create or update classification + market domain).
|
||||
|
||||
Usage:
|
||||
python scripts/register_from_classification.py --repo-path /path/to/repo [--dry-run]
|
||||
python scripts/register_from_classification.py --slug state-hub [--dry-run]
|
||||
python scripts/register_from_classification.py --bulk [--dry-run]
|
||||
python scripts/register_from_classification.py --help
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
_REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(_REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_REPO_ROOT))
|
||||
|
||||
from sqlalchemy import select # noqa: E402
|
||||
|
||||
from api.classification import ( # noqa: E402
|
||||
CLASSIFICATION_FILENAME,
|
||||
ClassificationData,
|
||||
load_classification_file,
|
||||
)
|
||||
from api.config import settings # noqa: E402
|
||||
from api.database import async_session_factory, engine # noqa: E402
|
||||
from api.models.domain import Domain # noqa: E402
|
||||
from api.models.managed_repo import ManagedRepo # noqa: E402
|
||||
|
||||
try:
|
||||
import httpx
|
||||
|
||||
_HAS_HTTPX = True
|
||||
except ImportError:
|
||||
_HAS_HTTPX = False
|
||||
|
||||
Outcome = Literal["registered", "updated", "skipped", "invalid"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class RowResult:
|
||||
slug: str
|
||||
path: str
|
||||
outcome: Outcome
|
||||
detail: str = ""
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegistrationReport:
|
||||
results: list[RowResult] = field(default_factory=list)
|
||||
|
||||
def add(self, result: RowResult) -> None:
|
||||
self.results.append(result)
|
||||
|
||||
def counts(self) -> dict[str, int]:
|
||||
totals = {"registered": 0, "updated": 0, "skipped": 0, "invalid": 0}
|
||||
for row in self.results:
|
||||
totals[row.outcome] = totals.get(row.outcome, 0) + 1
|
||||
return totals
|
||||
|
||||
def render_text(self) -> str:
|
||||
lines = ["register-from-classification report", ""]
|
||||
for row in self.results:
|
||||
lines.append(f" [{row.outcome:10}] {row.slug:30} {row.detail}")
|
||||
for warning in row.warnings:
|
||||
lines.append(f" warn: {warning}")
|
||||
counts = self.counts()
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"Summary: "
|
||||
f"registered={counts['registered']} "
|
||||
f"updated={counts['updated']} "
|
||||
f"skipped={counts['skipped']} "
|
||||
f"invalid={counts['invalid']}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"summary": self.counts(),
|
||||
"results": [
|
||||
{
|
||||
"slug": r.slug,
|
||||
"path": r.path,
|
||||
"outcome": r.outcome,
|
||||
"detail": r.detail,
|
||||
"warnings": r.warnings,
|
||||
}
|
||||
for r in self.results
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _slugify(name: str) -> str:
|
||||
slug = re.sub(r"[^a-z0-9]+", "-", name.lower()).strip("-")
|
||||
return slug or "repo"
|
||||
|
||||
|
||||
def _parse_classified_at(value: str | None) -> date | None:
|
||||
if not value:
|
||||
return None
|
||||
try:
|
||||
return date.fromisoformat(str(value)[:10])
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _git_value(repo_path: Path, args: list[str]) -> str | None:
|
||||
try:
|
||||
return subprocess.check_output(
|
||||
["git", *args],
|
||||
cwd=repo_path,
|
||||
stderr=subprocess.DEVNULL,
|
||||
text=True,
|
||||
).strip() or None
|
||||
except (subprocess.CalledProcessError, FileNotFoundError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
def _git_root(path: Path) -> Path:
|
||||
root = _git_value(path, ["rev-parse", "--show-toplevel"])
|
||||
return Path(root) if root else path.resolve()
|
||||
|
||||
|
||||
def _resolve_repo_path_for_host(repo: ManagedRepo) -> str | None:
|
||||
hostname = socket.gethostname()
|
||||
host_paths = repo.host_paths or {}
|
||||
path = host_paths.get(hostname) or repo.local_path
|
||||
if path and Path(path).is_dir():
|
||||
return path
|
||||
for candidate in host_paths.values():
|
||||
if candidate and Path(candidate).is_dir():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _classification_changed(repo: ManagedRepo, data: ClassificationData, domain_id) -> bool:
|
||||
if repo.domain_id != domain_id:
|
||||
return True
|
||||
fields = (
|
||||
("category", data.category),
|
||||
("secondary_domains", data.secondary_domains or None),
|
||||
("capability_tags", data.capability_tags or None),
|
||||
("business_stake", data.business_stake or None),
|
||||
("business_mechanics", data.business_mechanics or None),
|
||||
("classified_at", _parse_classified_at(data.classified_at)),
|
||||
("classified_by", data.classified_by),
|
||||
("standard_version", data.standard_version),
|
||||
)
|
||||
for attr, new_val in fields:
|
||||
if getattr(repo, attr) != new_val:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _apply_classification(repo: ManagedRepo, data: ClassificationData, domain_id) -> None:
|
||||
repo.domain_id = domain_id
|
||||
repo.category = data.category
|
||||
repo.secondary_domains = data.secondary_domains or None
|
||||
repo.capability_tags = data.capability_tags or None
|
||||
repo.business_stake = data.business_stake or None
|
||||
repo.business_mechanics = data.business_mechanics or None
|
||||
repo.classified_at = _parse_classified_at(data.classified_at)
|
||||
repo.classified_by = data.classified_by
|
||||
repo.standard_version = data.standard_version
|
||||
|
||||
|
||||
async def _get_domain_id(session, market_slug: str):
|
||||
result = await session.execute(select(Domain).where(Domain.slug == market_slug))
|
||||
domain = result.scalar_one_or_none()
|
||||
if domain is None:
|
||||
raise ValueError(f"Market domain '{market_slug}' not found in domains table")
|
||||
return domain.id
|
||||
|
||||
|
||||
async def _get_repo_by_slug(session, slug: str) -> ManagedRepo | None:
|
||||
result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug))
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
def _api_request(
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
api_base: str,
|
||||
body: dict | None = None,
|
||||
) -> tuple[int, Any]:
|
||||
if not _HAS_HTTPX:
|
||||
return (0, {"_error": "httpx not installed"})
|
||||
url = api_base.rstrip("/") + path
|
||||
try:
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.request(method, url, json=body)
|
||||
if response.status_code == 204:
|
||||
return response.status_code, None
|
||||
try:
|
||||
payload = response.json()
|
||||
except Exception:
|
||||
payload = {"_raw": response.text}
|
||||
return response.status_code, payload
|
||||
except httpx.HTTPError as exc:
|
||||
return (0, {"_error": str(exc)})
|
||||
|
||||
|
||||
async def _upsert_via_db(
|
||||
*,
|
||||
slug: str,
|
||||
repo_path: Path,
|
||||
data: ClassificationData,
|
||||
dry_run: bool,
|
||||
report: RegistrationReport,
|
||||
) -> None:
|
||||
git_root = _git_root(repo_path)
|
||||
remote_url = _git_value(git_root, ["remote", "get-url", "origin"])
|
||||
git_fingerprint = _git_value(git_root, ["rev-list", "--max-parents=0", "HEAD"])
|
||||
hostname = socket.gethostname()
|
||||
display_name = git_root.name.replace("-", " ").replace("_", " ").title()
|
||||
|
||||
async with async_session_factory() as session:
|
||||
try:
|
||||
domain_id = await _get_domain_id(session, data.domain)
|
||||
except ValueError as exc:
|
||||
if dry_run:
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"skipped",
|
||||
f"dry-run: {exc}",
|
||||
)
|
||||
)
|
||||
return
|
||||
report.add(RowResult(slug, str(git_root), "invalid", str(exc)))
|
||||
return
|
||||
|
||||
repo = await _get_repo_by_slug(session, slug)
|
||||
if repo is None:
|
||||
if dry_run:
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"registered",
|
||||
f"would create repo under domain '{data.domain}' (dry-run)",
|
||||
)
|
||||
)
|
||||
return
|
||||
repo = ManagedRepo(
|
||||
domain_id=domain_id,
|
||||
slug=slug,
|
||||
name=display_name,
|
||||
local_path=str(git_root),
|
||||
host_paths={hostname: str(git_root)},
|
||||
remote_url=remote_url,
|
||||
git_fingerprint=git_fingerprint,
|
||||
status="active",
|
||||
)
|
||||
_apply_classification(repo, data, domain_id)
|
||||
session.add(repo)
|
||||
await session.commit()
|
||||
report.add(
|
||||
RowResult(slug, str(git_root), "registered", f"domain={data.domain}")
|
||||
)
|
||||
return
|
||||
|
||||
warnings: list[str] = []
|
||||
if not _classification_changed(repo, data, domain_id):
|
||||
if repo.local_path != str(git_root):
|
||||
if dry_run:
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"skipped",
|
||||
"classification unchanged; would refresh local_path (dry-run)",
|
||||
)
|
||||
)
|
||||
return
|
||||
repo.local_path = str(git_root)
|
||||
host_paths = dict(repo.host_paths or {})
|
||||
host_paths[hostname] = str(git_root)
|
||||
repo.host_paths = host_paths
|
||||
if remote_url:
|
||||
repo.remote_url = remote_url
|
||||
if git_fingerprint:
|
||||
repo.git_fingerprint = git_fingerprint
|
||||
await session.commit()
|
||||
report.add(
|
||||
RowResult(slug, str(git_root), "skipped", "paths refreshed only")
|
||||
)
|
||||
return
|
||||
report.add(
|
||||
RowResult(slug, str(git_root), "skipped", "classification already current")
|
||||
)
|
||||
return
|
||||
|
||||
if dry_run:
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"updated",
|
||||
f"would update classification (domain={data.domain}) (dry-run)",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
_apply_classification(repo, data, domain_id)
|
||||
repo.local_path = str(git_root)
|
||||
host_paths = dict(repo.host_paths or {})
|
||||
host_paths[hostname] = str(git_root)
|
||||
repo.host_paths = host_paths
|
||||
if remote_url:
|
||||
repo.remote_url = remote_url
|
||||
if git_fingerprint:
|
||||
repo.git_fingerprint = git_fingerprint
|
||||
await session.commit()
|
||||
report.add(
|
||||
RowResult(slug, str(git_root), "updated", f"domain={data.domain}")
|
||||
)
|
||||
|
||||
|
||||
async def _upsert_via_api(
|
||||
*,
|
||||
slug: str,
|
||||
repo_path: Path,
|
||||
data: ClassificationData,
|
||||
dry_run: bool,
|
||||
api_base: str,
|
||||
report: RegistrationReport,
|
||||
) -> None:
|
||||
git_root = _git_root(repo_path)
|
||||
remote_url = _git_value(git_root, ["remote", "get-url", "origin"])
|
||||
git_fingerprint = _git_value(git_root, ["rev-list", "--max-parents=0", "HEAD"])
|
||||
hostname = socket.gethostname()
|
||||
display_name = git_root.name.replace("-", " ").replace("_", " ").title()
|
||||
|
||||
status, existing = _api_request("GET", f"/repos/{slug}", api_base=api_base)
|
||||
if status == 404 or (isinstance(existing, dict) and existing.get("detail")):
|
||||
existing = None
|
||||
elif status == 0:
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"invalid",
|
||||
f"API unreachable: {existing.get('_error', existing)}",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
patch_body = {
|
||||
"category": data.category,
|
||||
"secondary_domains": data.secondary_domains,
|
||||
"capability_tags": data.capability_tags,
|
||||
"business_stake": data.business_stake,
|
||||
"business_mechanics": data.business_mechanics,
|
||||
"classified_at": data.classified_at,
|
||||
"classified_by": data.classified_by,
|
||||
"standard_version": data.standard_version,
|
||||
"domain_slug": data.domain,
|
||||
"local_path": str(git_root),
|
||||
"remote_url": remote_url,
|
||||
"git_fingerprint": git_fingerprint,
|
||||
}
|
||||
|
||||
if existing is None:
|
||||
if dry_run:
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"registered",
|
||||
f"would POST /repos/ domain={data.domain} (dry-run)",
|
||||
)
|
||||
)
|
||||
return
|
||||
post_body = {
|
||||
"domain_slug": data.domain,
|
||||
"slug": slug,
|
||||
"name": display_name,
|
||||
"local_path": str(git_root),
|
||||
"host_paths": {hostname: str(git_root)},
|
||||
"remote_url": remote_url,
|
||||
"git_fingerprint": git_fingerprint,
|
||||
}
|
||||
code, created = _api_request("POST", "/repos/", api_base=api_base, body=post_body)
|
||||
if code not in (200, 201):
|
||||
detail = created.get("detail", created) if isinstance(created, dict) else created
|
||||
report.add(RowResult(slug, str(git_root), "invalid", f"POST failed: {detail}"))
|
||||
return
|
||||
code, updated = _api_request(
|
||||
"PATCH", f"/repos/{slug}", api_base=api_base, body=patch_body
|
||||
)
|
||||
if code != 200:
|
||||
detail = updated.get("detail", updated) if isinstance(updated, dict) else updated
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"invalid",
|
||||
f"created repo but classification PATCH failed: {detail}",
|
||||
)
|
||||
)
|
||||
return
|
||||
report.add(RowResult(slug, str(git_root), "registered", f"domain={data.domain}"))
|
||||
return
|
||||
|
||||
if dry_run:
|
||||
report.add(
|
||||
RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"updated",
|
||||
f"would PATCH /repos/{slug} domain={data.domain} (dry-run)",
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
code, updated = _api_request(
|
||||
"PATCH", f"/repos/{slug}", api_base=api_base, body=patch_body
|
||||
)
|
||||
if code != 200:
|
||||
detail = updated.get("detail", updated) if isinstance(updated, dict) else updated
|
||||
report.add(RowResult(slug, str(git_root), "invalid", f"PATCH failed: {detail}"))
|
||||
return
|
||||
_api_request(
|
||||
"POST",
|
||||
f"/repos/{slug}/paths",
|
||||
api_base=api_base,
|
||||
body={"host": hostname, "path": str(git_root)},
|
||||
)
|
||||
report.add(RowResult(slug, str(git_root), "updated", f"domain={data.domain}"))
|
||||
|
||||
|
||||
async def register_one(
|
||||
*,
|
||||
slug: str,
|
||||
repo_path: Path,
|
||||
dry_run: bool = False,
|
||||
use_api: bool = False,
|
||||
api_base: str | None = None,
|
||||
report: RegistrationReport | None = None,
|
||||
) -> RowResult:
|
||||
"""Register or update a single repo from its classification file."""
|
||||
report = report or RegistrationReport()
|
||||
git_root = _git_root(repo_path)
|
||||
data, errors, warnings = load_classification_file(git_root)
|
||||
if data is None:
|
||||
result = RowResult(
|
||||
slug,
|
||||
str(git_root),
|
||||
"invalid",
|
||||
"; ".join(errors) or "classification invalid",
|
||||
warnings=warnings,
|
||||
)
|
||||
report.add(result)
|
||||
return result
|
||||
|
||||
if use_api:
|
||||
await _upsert_via_api(
|
||||
slug=slug,
|
||||
repo_path=git_root,
|
||||
data=data,
|
||||
dry_run=dry_run,
|
||||
api_base=api_base or settings.api_base,
|
||||
report=report,
|
||||
)
|
||||
else:
|
||||
await _upsert_via_db(
|
||||
slug=slug,
|
||||
repo_path=git_root,
|
||||
data=data,
|
||||
dry_run=dry_run,
|
||||
report=report,
|
||||
)
|
||||
return report.results[-1]
|
||||
|
||||
|
||||
async def _bulk_targets(session) -> list[tuple[str, str]]:
|
||||
result = await session.execute(
|
||||
select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.slug)
|
||||
)
|
||||
targets: list[tuple[str, str]] = []
|
||||
for repo in result.scalars().all():
|
||||
path = _resolve_repo_path_for_host(repo)
|
||||
if path:
|
||||
targets.append((repo.slug, path))
|
||||
return targets
|
||||
|
||||
|
||||
async def run_registration(args: argparse.Namespace) -> RegistrationReport:
|
||||
report = RegistrationReport()
|
||||
use_api = args.api and not args.db
|
||||
|
||||
if args.bulk:
|
||||
async with async_session_factory() as session:
|
||||
targets = await _bulk_targets(session)
|
||||
if not targets:
|
||||
report.add(
|
||||
RowResult("(bulk)", "", "skipped", "no active repos with accessible local paths")
|
||||
)
|
||||
return report
|
||||
for slug, path in targets:
|
||||
await register_one(
|
||||
slug=slug,
|
||||
repo_path=Path(path),
|
||||
dry_run=args.dry_run,
|
||||
use_api=use_api,
|
||||
api_base=args.api_base,
|
||||
report=report,
|
||||
)
|
||||
return report
|
||||
|
||||
if args.repo_path:
|
||||
repo_path = Path(args.repo_path).expanduser().resolve()
|
||||
slug = args.slug or _slugify(_git_root(repo_path).name)
|
||||
await register_one(
|
||||
slug=slug,
|
||||
repo_path=repo_path,
|
||||
dry_run=args.dry_run,
|
||||
use_api=use_api,
|
||||
api_base=args.api_base,
|
||||
report=report,
|
||||
)
|
||||
return report
|
||||
|
||||
if args.slug:
|
||||
async with async_session_factory() as session:
|
||||
repo = await _get_repo_by_slug(session, args.slug)
|
||||
if repo is None:
|
||||
report.add(RowResult(args.slug, "", "invalid", "repo slug not found in DB"))
|
||||
return report
|
||||
path = _resolve_repo_path_for_host(repo)
|
||||
if not path:
|
||||
report.add(
|
||||
RowResult(
|
||||
args.slug,
|
||||
"",
|
||||
"invalid",
|
||||
"no accessible local path (local_path / host_paths)",
|
||||
)
|
||||
)
|
||||
return report
|
||||
await register_one(
|
||||
slug=args.slug,
|
||||
repo_path=Path(path),
|
||||
dry_run=args.dry_run,
|
||||
use_api=use_api,
|
||||
api_base=args.api_base,
|
||||
report=report,
|
||||
)
|
||||
return report
|
||||
|
||||
raise SystemExit("Specify --repo-path PATH, --slug SLUG, or --bulk")
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Register or update managed_repos from .repo-classification.yaml",
|
||||
)
|
||||
parser.add_argument("--repo-path", metavar="PATH", help="Local git checkout path")
|
||||
parser.add_argument(
|
||||
"--slug",
|
||||
metavar="SLUG",
|
||||
help="Registered repo slug (required with --bulk omitted unless --repo-path given)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bulk",
|
||||
action="store_true",
|
||||
help="All active registered repos with accessible local paths",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Report actions without writing to DB/API",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api",
|
||||
action="store_true",
|
||||
help="Upsert via REST API (default: direct DB session)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
action="store_true",
|
||||
help="Force direct DB session (overrides --api)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-base",
|
||||
default=settings.api_base,
|
||||
help=f"State Hub API base URL (default: {settings.api_base})",
|
||||
)
|
||||
parser.add_argument("--json", action="store_true", help="Emit JSON report")
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args(argv)
|
||||
if args.bulk:
|
||||
if args.repo_path:
|
||||
parser.error("--bulk cannot be combined with --repo-path")
|
||||
elif args.repo_path:
|
||||
pass
|
||||
elif args.slug:
|
||||
pass
|
||||
else:
|
||||
parser.error("Specify one of --repo-path PATH, --slug SLUG, or --bulk")
|
||||
|
||||
report = asyncio.run(run_registration(args))
|
||||
if args.json:
|
||||
print(json.dumps(report.to_dict(), indent=2))
|
||||
else:
|
||||
print(report.render_text())
|
||||
|
||||
counts = report.counts()
|
||||
return 1 if counts["invalid"] else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user