Files
state-hub/scripts/spine_migration_data.py
tegwick 0949d4c0d8 feat(classification-spine): implement STATE-WP-0065 repo-anchored model
Replace the ad-hoc coordination-domain spine with the Repo Classification
Standard: 14 market domains, classification columns on managed_repos, and
workplans anchored by repo_id (topic_id optional).

- Add Alembic migration d8e9f0a1b2c3 with data backfill and workstream→workplan rename
- Add api/classification.py validation and register-from-classification tooling
- Expose workplan-first REST/MCP surface with legacy workstream aliases
- Add C-24 consistency rule and legacy domain frontmatter mapping
- Update dashboard repos page with category/capability/stake filters
- Update orientation docs; mark STATE-WP-0065 finished
2026-06-22 13:52:13 +02:00

334 lines
10 KiB
Python

"""Constants and mappings for STATE-WP-0065 P1 spine migration.
Shared by the Alembic revision and the dry-run report script.
"""
from __future__ import annotations
import uuid
from datetime import date
# Deterministic UUIDs for market-domain rows (stable across runs).
_MARKET_DOMAIN_NAMESPACE = uuid.UUID("8dc7d106-11e2-41df-b512-89ed69d2a65f")
# 14 fixed market domains from Repo Classification Standard v1.0 §6.
MARKET_DOMAINS: list[tuple[str, str]] = [
("infotech", "Infotech"),
("financials", "Financials"),
("communication", "Communication"),
("consumer", "Consumer"),
("health", "Health"),
("industrials", "Industrials"),
("energy", "Energy"),
("utilities", "Utilities"),
("materials", "Materials"),
("realestate", "Real Estate"),
("crypto", "Crypto"),
("agents", "Agents"),
("space", "Space"),
("government", "Government"),
]
# Old coordination-domain slugs (pre-migration ``domains`` table) → market domain.
OLD_DOMAIN_TO_MARKET: dict[str, str] = {
"custodian": "infotech",
"railiance": "financials",
"markitect": "communication",
"coulomb_social": "communication",
"personhood": "government",
"foerster_capabilities": "agents",
# Extended coordination domains (beyond the original 6 canonical seeds).
"capabilities": "agents",
"canon": "infotech",
"citation_evidence": "infotech",
"helix_forge": "infotech",
"inter_hub": "infotech",
"netkingdom": "communication",
"stack": "infotech",
"vergabe_teilnahme": "government",
"whynot": "consumer",
"test_domain_v2": "infotech",
}
# Best-effort reverse map for downgrade (lossy: many market domains → one old slug).
MARKET_TO_OLD_DOMAIN: dict[str, str] = {
market: old
for old, market in OLD_DOMAIN_TO_MARKET.items()
}
# Legacy coordination domains restored on downgrade.
OLD_COORDINATION_DOMAINS: list[tuple[str, str]] = [
("custodian", "The Custodian"),
("railiance", "Railiance"),
("markitect", "Markitect"),
("coulomb_social", "Coulomb.social"),
("personhood", "Personhood"),
("foerster_capabilities", "Foerster Capabilities"),
]
# Human-reviewed classifications for the 11 custodian-domain fixture repos.
REPO_CLASSIFICATIONS: dict[str, dict] = {
"the-custodian": {
"category": "research",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"governance",
"knowledge",
"coordination",
"policy",
"documentation",
],
"business_stake": ["technology", "operations", "intelligence", "execution"],
"business_mechanics": ["intention", "control", "coordination", "adaptation"],
},
"inter-hub": {
"category": "research",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"governance",
"observability",
"platform",
"coordination",
"orchestration",
],
"business_stake": ["technology", "intelligence", "operations"],
"business_mechanics": ["control", "coordination", "adaptation"],
},
"state-hub": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"coordination",
"knowledge",
"platform",
"observability",
"governance",
],
"business_stake": [
"technology",
"operations",
"product",
"intelligence",
"automation",
],
"business_mechanics": ["coordination", "control", "operation", "adaptation"],
},
"hub-core": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": [],
"capability_tags": ["platform", "configuration", "orchestration"],
"business_stake": ["technology", "execution", "product"],
"business_mechanics": ["operation"],
},
"activity-core": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"workflow",
"orchestration",
"automation",
"coordination",
"observability",
],
"business_stake": ["technology", "operations", "automation", "execution"],
"business_mechanics": ["coordination", "operation", "adaptation"],
},
"issue-core": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["agents"],
"capability_tags": [
"workflow",
"coordination",
"orchestration",
"traceability",
],
"business_stake": ["technology", "product", "operations", "automation"],
"business_mechanics": ["coordination", "operation"],
},
"kaizen-agentic": {
"category": "tooling",
"domain": "agents",
"secondary_domains": ["infotech"],
"capability_tags": [
"orchestration",
"automation",
"coordination",
"knowledge",
"documentation",
],
"business_stake": [
"technology",
"product",
"automation",
"people",
"intelligence",
],
"business_mechanics": [
"intention",
"coordination",
"operation",
"adaptation",
],
},
"llm-connect": {
"category": "tooling",
"domain": "agents",
"secondary_domains": ["infotech"],
"capability_tags": [
"orchestration",
"model-routing",
"configuration",
"automation",
],
"business_stake": ["technology", "product", "automation"],
"business_mechanics": ["operation", "adaptation"],
},
"ops-bridge": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": [],
"capability_tags": [
"operations",
"access-control",
"platform",
"observability",
"orchestration",
],
"business_stake": ["operations", "technology", "automation"],
"business_mechanics": ["control", "operation", "adaptation"],
},
"ops-warden": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": [],
"capability_tags": [
"identity",
"access-control",
"security",
"policy",
"audit",
"governance",
],
"business_stake": ["technology", "operations", "legal", "automation"],
"business_mechanics": ["control", "operation"],
},
"email-connect": {
"category": "tooling",
"domain": "infotech",
"secondary_domains": ["communication"],
"capability_tags": [
"evidence",
"traceability",
"source-management",
"automation",
],
"business_stake": ["technology", "operations", "legal"],
"business_mechanics": ["operation", "coordination"],
},
}
# Repo discrepancy resolution (STATE-WP-0065 §P1 data migration).
REPO_DISPOSITIONS: dict[str, dict] = {
"markitect-project": {
"action": "relink_to",
"target_slug": "markitect-main",
"archive": True,
},
"railiance-bootstrap": {
"action": "archive",
},
"railiance-hosts": {
"action": "archive",
},
"vergabe_teilnahme": {
"action": "collapse_into",
"target_slug": "vergabe-teilnahme",
"archive": True,
},
}
# Fallback repo slug for orphan workplans after backfill.
FALLBACK_REPO_SLUG = "state-hub"
STANDARD_VERSION = "1.0"
def market_domain_uuid(slug: str) -> str:
"""Deterministic UUID string for a market-domain slug."""
return str(uuid.uuid5(_MARKET_DOMAIN_NAMESPACE, f"state-hub.market-domain.{slug}"))
def old_domain_uuid(slug: str) -> str:
"""Deterministic UUID string for a legacy coordination-domain slug."""
return str(uuid.uuid5(_MARKET_DOMAIN_NAMESPACE, f"state-hub.coordination-domain.{slug}"))
def derive_classification(repo_slug: str, old_domain_slug: str | None) -> dict:
"""Return a classification dict for *repo_slug*.
Uses committed ``REPO_CLASSIFICATIONS`` when present; otherwise derives a
migration-time classification from the old coordination domain.
"""
if repo_slug in REPO_CLASSIFICATIONS:
data = dict(REPO_CLASSIFICATIONS[repo_slug])
data.setdefault("classified_by", "human")
return data
market = OLD_DOMAIN_TO_MARKET.get(old_domain_slug or "", "infotech")
# Domain-specific heuristics for repos without committed classification files.
category = "project"
secondary_domains: list[str] = []
capability_tags: list[str] = []
business_stake: list[str] = []
business_mechanics: list[str] = []
if old_domain_slug == "custodian":
category = "tooling"
capability_tags = ["platform"]
business_stake = ["technology", "operations"]
elif old_domain_slug == "railiance":
category = "project"
capability_tags = ["platform", "operations"]
business_stake = ["technology", "operations"]
elif old_domain_slug == "markitect":
category = "project"
capability_tags = ["knowledge", "documentation"]
business_stake = ["technology", "product"]
elif old_domain_slug == "coulomb_social":
category = "experimental"
capability_tags = ["marketplace", "collaboration"]
business_stake = ["product", "sales"]
elif old_domain_slug == "personhood":
category = "research"
capability_tags = ["governance", "policy"]
business_stake = ["legal", "technology", "intelligence"]
business_mechanics = ["intention", "control"]
elif old_domain_slug == "foerster_capabilities":
category = "research"
capability_tags = ["knowledge"]
business_stake = ["intelligence", "technology"]
return {
"category": category,
"domain": market,
"secondary_domains": secondary_domains,
"capability_tags": capability_tags,
"business_stake": business_stake,
"business_mechanics": business_mechanics,
"classified_by": "migration",
}
def migration_provenance() -> dict:
"""Provenance fields applied during Alembic backfill."""
return {
"classified_at": date.today().isoformat(),
"classified_by": "migration",
"standard_version": STANDARD_VERSION,
}