feat(classification-spine): implement STATE-WP-0065 repo-anchored model

Replace the ad-hoc coordination-domain spine with the Repo Classification
Standard: 14 market domains, classification columns on managed_repos, and
workplans anchored by repo_id (topic_id optional).

- Add Alembic migration d8e9f0a1b2c3 with data backfill and workstream→workplan rename
- Add api/classification.py validation and register-from-classification tooling
- Expose workplan-first REST/MCP surface with legacy workstream aliases
- Add C-24 consistency rule and legacy domain frontmatter mapping
- Update dashboard repos page with category/capability/stake filters
- Update orientation docs; mark STATE-WP-0065 finished
This commit is contained in:
2026-06-22 13:52:13 +02:00
parent 279be4ffbd
commit 0949d4c0d8
84 changed files with 4494 additions and 1111 deletions

View File

@@ -11,7 +11,7 @@ from api.database import get_session
from api.models.managed_repo import ManagedRepo
from api.models.task import Task
from api.models.token_event import TokenEvent
from api.models.workstream import Workstream
from api.models.workplan import Workplan
from api.schemas.token_event import (
RepoTokenSummary,
TokenAggregateRow,
@@ -102,14 +102,14 @@ def _apply_event_defaults(data: dict[str, Any]) -> dict[str, Any]:
async def _populate_relationship_defaults(data: dict[str, Any], session: AsyncSession) -> dict[str, Any]:
# Auto-populate workstream_id from task if not provided
if data.get("task_id") and not data.get("workstream_id"):
if data.get("task_id") and not data.get("workplan_id"):
task = await session.get(Task, data["task_id"])
if task:
data["workstream_id"] = task.workstream_id
data["workplan_id"] = task.workplan_id
# Auto-populate repo_id from workstream if not provided
if data.get("workstream_id") and not data.get("repo_id"):
ws = await session.get(Workstream, data["workstream_id"])
if data.get("workplan_id") and not data.get("repo_id"):
ws = await session.get(Workplan, data["workplan_id"])
if ws and ws.repo_id:
data["repo_id"] = ws.repo_id
return data
@@ -169,7 +169,7 @@ def _filter_query(
if task_id:
q = q.where(TokenEvent.task_id == task_id)
if workstream_id:
q = q.where(TokenEvent.workstream_id == workstream_id)
q = q.where(TokenEvent.workplan_id == workstream_id)
if repo_id:
q = q.where(TokenEvent.repo_id == repo_id)
if ref_type:
@@ -195,7 +195,7 @@ def _filter_query(
if unattributed:
q = q.where(
TokenEvent.repo_id.is_(None),
TokenEvent.workstream_id.is_(None),
TokenEvent.workplan_id.is_(None),
TokenEvent.task_id.is_(None),
)
return q
@@ -238,7 +238,7 @@ async def get_token_summary(
uid = uuid.UUID(id)
except ValueError:
raise HTTPException(status_code=422, detail="id must be a valid UUID for scope=workstream")
q = q.where(TokenEvent.workstream_id == uid)
q = q.where(TokenEvent.workplan_id == uid)
elif scope == "repo":
try:
uid = uuid.UUID(id)
@@ -297,7 +297,7 @@ async def get_tokens_by_repo(
Resolution order for each event:
1. token_events.repo_id (direct)
2. → workstreams.repo_id (via workstream_id)
3. → task.workstream_id → workstreams.repo_id (via task_id)
3. → task.workplan_id → workstreams.repo_id (via task_id)
Only events that resolve to a repo are included.
"""
@@ -314,8 +314,8 @@ async def get_tokens_by_repo(
)
events = list(events_result.scalars().all())
ws_result = await session.execute(select(Workstream))
ws_map: dict[uuid.UUID, Workstream] = {w.id: w for w in ws_result.scalars().all()}
ws_result = await session.execute(select(Workplan))
ws_map: dict[uuid.UUID, Workplan] = {w.id: w for w in ws_result.scalars().all()}
task_result = await session.execute(select(Task))
task_map: dict[uuid.UUID, Task] = {t.id: t for t in task_result.scalars().all()}
@@ -326,9 +326,9 @@ async def get_tokens_by_repo(
def resolve_repo_id(e: TokenEvent) -> uuid.UUID | None:
if e.repo_id:
return e.repo_id
ws_id = e.workstream_id
ws_id = e.workplan_id
if not ws_id and e.task_id and e.task_id in task_map:
ws_id = task_map[e.task_id].workstream_id
ws_id = task_map[e.task_id].workplan_id
if ws_id and ws_id in ws_map:
return ws_map[ws_id].repo_id
return None
@@ -391,8 +391,8 @@ async def get_token_aggregate(
)
events = list(events_result.scalars().all())
ws_result = await session.execute(select(Workstream))
ws_map: dict[uuid.UUID, Workstream] = {w.id: w for w in ws_result.scalars().all()}
ws_result = await session.execute(select(Workplan))
ws_map: dict[uuid.UUID, Workplan] = {w.id: w for w in ws_result.scalars().all()}
task_result = await session.execute(select(Task))
task_map: dict[uuid.UUID, Task] = {t.id: t for t in task_result.scalars().all()}
@@ -403,9 +403,9 @@ async def get_token_aggregate(
def resolve_repo_id(e: TokenEvent) -> uuid.UUID | None:
if e.repo_id:
return e.repo_id
ws_id = e.workstream_id
ws_id = e.workplan_id
if not ws_id and e.task_id and e.task_id in task_map:
ws_id = task_map[e.task_id].workstream_id
ws_id = task_map[e.task_id].workplan_id
if ws_id and ws_id in ws_map:
return ws_map[ws_id].repo_id
return None
@@ -458,7 +458,7 @@ async def get_token_aggregate(
repo = repo_map.get(rid) if rid else None
add(by_repo, str(rid) if rid else None, repo.slug if repo else None, e)
ws_id = e.workstream_id or (task_map[e.task_id].workstream_id if e.task_id in task_map else None)
ws_id = e.workplan_id or (task_map[e.task_id].workplan_id if e.task_id in task_map else None)
ws = ws_map.get(ws_id) if ws_id else None
add(by_workstream, str(ws_id) if ws_id else None, ws.title if ws else None, e)
@@ -520,7 +520,7 @@ async def get_token_quality(
source_counts[(e.measurement_kind, e.source_provider, e.source_id)] += 1
if e.source_provider == "task_fallback" or e.note == "heuristic":
fallback_count += 1
if e.measurement_kind == "measured" and not (e.repo_id or e.workstream_id or e.task_id):
if e.measurement_kind == "measured" and not (e.repo_id or e.workplan_id or e.task_id):
unattributed_measured_count += 1
if e.measurement_kind == "measured" and not e.source_id:
missing_provenance_count += 1