Complete workplan state model cleanup

2026-05-18 01:31:36 +02:00
parent 98b2cb6484
commit d6522a9a40
42 changed files with 789 additions and 310 deletions
--- a/api/flow_defs.py
+++ b/api/flow_defs.py
@@ -51,7 +51,10 @@ def _dependencies_any_incomplete(
    obj: dict[str, Any],
    values: list[Any],
 ) -> bool:
-    return bool(values) and any(value != assertion.value for value in values)
+    expected = assertion.value
+    if isinstance(expected, list):
+        return bool(values) and any(value not in expected for value in values)
+    return bool(values) and any(value != expected for value in values)


 def assertion_result_to_dict(result: AssertionResult) -> dict[str, Any]:
--- a/api/routers/flows.py
+++ b/api/routers/flows.py
@@ -19,6 +19,7 @@ from api.models.contribution import Contribution
 from api.models.task import Task
 from api.models.workstream import Workstream
 from api.models.workstream_dependency import WorkstreamDependency
+from api.workplan_status import normalize_workstream_status

 router = APIRouter(prefix="/flows", tags=["flows"])

@@ -104,11 +105,12 @@ async def _flow_object(
 ) -> dict[str, Any]:
    entity = await _entity(entity_type, entity_id, session)
    status = _value(entity.status)
+    current_status = normalize_workstream_status(status) if entity_type == "workstream" else status
    obj: dict[str, Any] = {
        "id": str(entity.id),
-        "status": status,
-        "workstation": status,
-        "previous_workstation": status,
+        "status": current_status,
+        "workstation": current_status,
+        "previous_workstation": current_status,
    }

    if entity_type == "workstream":
@@ -127,7 +129,7 @@ async def _flow_object(
                select(Workstream).where(Workstream.id.in_(dependency_ids))
            )).scalars().all())
            dependency_workstations = [
-                {"id": str(ws.id), "workstation": ws.status}
+                {"id": str(ws.id), "workstation": normalize_workstream_status(ws.status)}
                for ws in dep_ws
            ]
        obj.update({
--- a/api/routers/state.py
+++ b/api/routers/state.py
@@ -38,6 +38,11 @@ from api.schemas.task import TaskRead
 from api.schemas.topic import TopicRead, TopicWithWorkstreams
 from api.schemas.workstream import WorkstreamRead, WorkstreamWithTaskCounts, WorkstreamWithDeps
 from api.schemas.workstream_dependency import WorkstreamDepStub
+from api.workplan_status import (
+    CLOSED_WORKSTREAM_STATUSES,
+    OPEN_WORKSTREAM_STATUSES,
+    normalize_workstream_status,
+)
 from task_flow_engine import FlowEngine

 router = APIRouter(prefix="/state", tags=["state"])
@@ -119,7 +124,7 @@ async def get_summary(
    open_ws_rows = await session.execute(
        select(Workstream)
        .options(noload("*"))
-        .where(Workstream.status.in_(["active", "blocked"]))
+        .where(Workstream.status.in_(OPEN_WORKSTREAM_STATUSES))
        .order_by(Workstream.due_date.asc().nullslast(), Workstream.created_at)
    )
    open_ws = list(open_ws_rows.scalars().all())
@@ -211,7 +216,7 @@ async def get_summary(
            "workstation": w.status,
            "tasks": [{"status": status} for status in task_statuses_per_ws.get(w.id, [])],
            "dependencies": [
-                {"workstation": ws_lookup[d.to_workstream_id].status}
+                {"workstation": normalize_workstream_status(ws_lookup[d.to_workstream_id].status)}
                for d in dep_rows
                if d.from_workstream_id == w.id and d.to_workstream_id and d.to_workstream_id in ws_lookup
            ],
@@ -244,9 +249,16 @@ async def get_summary(
            total=sum(topic_counts.values()),
        ),
        workstreams=WorkstreamTotals(
+            proposed=ws_counts.get("proposed", 0),
+            ready=ws_counts.get("ready", 0) + ws_counts.get("todo", 0),
            active=sum(1 for status in effective_status.values() if status == "active"),
            blocked=sum(1 for status in effective_status.values() if status == "blocked"),
-            completed=ws_counts.get("completed", 0),
+            backlog=ws_counts.get("backlog", 0),
+            finished=(
+                ws_counts.get("finished", 0)
+                + ws_counts.get("completed", 0)
+                + ws_counts.get("accepted", 0)
+            ),
            archived=ws_counts.get("archived", 0),
            total=sum(ws_counts.values()),
        ),
@@ -366,7 +378,7 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]:
    for domain_id, cnt in await session.execute(
        select(Topic.domain_id, func.count(Workstream.id))
        .join(Workstream, Workstream.topic_id == Topic.id)
-        .where(Workstream.status == "active")
+        .where(Workstream.status.in_(["active", "blocked"]))
        .group_by(Topic.domain_id)
    ):
        ws_per_domain[domain_id] = cnt
@@ -405,7 +417,7 @@ async def get_deps(session: AsyncSession = Depends(get_session)) -> list[Workstr
    open_ws_rows = await session.execute(
        select(Workstream)
        .options(noload("*"))
-        .where(Workstream.status.in_(["active", "blocked"]))
+        .where(Workstream.status.in_(OPEN_WORKSTREAM_STATUSES))
        .order_by(Workstream.due_date.asc().nullslast(), Workstream.created_at)
    )
    open_ws = list(open_ws_rows.scalars().all())
@@ -488,7 +500,7 @@ async def _derive_next_steps(session: AsyncSession) -> list[NextStep]:

    Two signal sources:
    1. Recently resolved decisions (last 7 days) → first open task in same workstream
-    2. Workstreams whose every dependency is now completed → first todo task in that workstream
+    2. Workstreams whose every dependency is now finished -> first todo task in that workstream
    """
    steps: list[NextStep] = []
    seen_task_ids: set = set()
@@ -575,8 +587,11 @@ async def _derive_next_steps(session: AsyncSession) -> list[NextStep]:
    ready_from_ws_ids = [
        from_ws_id
        for from_ws_id, to_ws_ids in dep_map.items()
-        if ws_info.get(from_ws_id, {}).get("status") in ("active", "blocked")
-        and all(ws_info.get(to_id, {}).get("status") == "completed" for to_id in to_ws_ids)
+        if normalize_workstream_status(ws_info.get(from_ws_id, {}).get("status")) in OPEN_WORKSTREAM_STATUSES
+        and all(
+            normalize_workstream_status(ws_info.get(to_id, {}).get("status")) in CLOSED_WORKSTREAM_STATUSES
+            for to_id in to_ws_ids
+        )
    ]

    todo_by_ws: dict = {}
@@ -613,7 +628,7 @@ async def _derive_next_steps(session: AsyncSession) -> list[NextStep]:
            task_id=task.id,
            task_title=task.title,
            message=(
-                f"All dependencies of '{from_ws['title']}' are completed ({blocker_slugs}) → "
+                f"All dependencies of '{from_ws['title']}' are finished ({blocker_slugs}) -> "
                f"'{task.title}' is ready to start"
            ),
        ))
@@ -650,7 +665,7 @@ async def get_next_steps(session: AsyncSession = Depends(get_session)) -> list[N

    Returns suggestions based on:
    - Recently resolved decisions → first open task in the same workstream
-    - Workstreams whose every dependency workstream is now completed → first todo task
+    - Workstreams whose every dependency workstream is now finished -> first todo task
    """
    return await _derive_next_steps(session)

--- a/api/routers/workstreams.py
+++ b/api/routers/workstreams.py
@@ -5,6 +5,7 @@ import time
 from pathlib import Path
 from typing import Any

+import yaml
 from fastapi import APIRouter, Depends, HTTPException, Query, status
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -16,9 +17,13 @@ from api.models.workstream import Workstream
 from api.schemas.workstream import (
    WorkstreamCreate,
    WorkstreamRead,
-    WorkstreamStatus,
    WorkstreamUpdate,
 )
+from api.workplan_status import (
+    is_supported_workstream_status,
+    normalize_workstream_status,
+    ready_review_status,
+)

 router = APIRouter(prefix="/workstreams", tags=["workstreams"])

@@ -53,17 +58,10 @@ def _frontmatter(path: Path) -> dict[str, Any]:
    if end == -1:
        return {}

-    data: dict[str, Any] = {}
-    for raw_line in text[4:end].splitlines():
-        line = raw_line.strip()
-        if not line or line.startswith("#") or ":" not in line:
-            continue
-        key, value = line.split(":", 1)
-        value = value.strip()
-        if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
-            value = value[1:-1]
-        data[key.strip()] = value
-    return data
+    try:
+        return yaml.safe_load(text[4:end].strip()) or {}
+    except yaml.YAMLError:
+        return {}


@router.get("/", response_model=list[WorkstreamRead])
@@ -71,7 +69,7 @@ async def list_workstreams(
    topic_id: uuid.UUID | None = None,
    repo_id: uuid.UUID | None = None,
    repo_goal_id: uuid.UUID | None = None,
-    status: WorkstreamStatus | None = None,
+    status: str | None = None,
    owner: str | None = None,
    slug: str | None = None,
    session: AsyncSession = Depends(get_session),
@@ -84,7 +82,10 @@ async def list_workstreams(
    if repo_goal_id:
        q = q.where(Workstream.repo_goal_id == repo_goal_id)
    if status:
-        q = q.where(Workstream.status == status)
+        normalised_status = normalize_workstream_status(status)
+        if not is_supported_workstream_status(status):
+            raise HTTPException(status_code=422, detail=f"Unsupported workstream status '{status}'")
+        q = q.where(Workstream.status == normalised_status)
    if owner:
        q = q.where(Workstream.owner == owner)
    if slug:
@@ -127,11 +128,24 @@ async def workplan_index(
                workstream_id = data.get("state_hub_workstream_id")
                if not workstream_id:
                    continue
+                file_status = normalize_workstream_status(data.get("status", ""))
+                review = (
+                    ready_review_status(
+                        root,
+                        data.get("reviewed_against_commit"),
+                        data.get("context_paths"),
+                    )
+                    if file_status == "ready"
+                    else None
+                )
                index[str(workstream_id)] = {
                    "filename": path.name,
                    "relative_path": str(path.relative_to(root)),
                    "repo_slug": repo.slug,
                    "archived": archived,
+                    "status": file_status or None,
+                    "needs_review": bool(review and review.needs_review),
+                    "health_labels": ["needs_review"] if review and review.needs_review else [],
                }
    _INDEX_CACHE = {"workstreams": index}
    _INDEX_CACHE_AT = time.monotonic()
@@ -176,7 +190,7 @@ async def update_workstream(
    await session.commit()
    await session.refresh(ws)

-    if prev_status != "completed" and ws.status == "completed":
+    if normalize_workstream_status(prev_status) != "finished" and ws.status == "finished":
        subject = "org.statehub.workstream.completed"
        envelope = EventEnvelope.new(
            subject,
--- a/api/schemas/state.py
+++ b/api/schemas/state.py
@@ -19,9 +19,12 @@ class TopicTotals(BaseModel):


 class WorkstreamTotals(BaseModel):
+    proposed: int = 0
+    ready: int = 0
    active: int = 0
    blocked: int = 0
-    completed: int = 0
+    backlog: int = 0
+    finished: int = 0
    archived: int = 0
    total: int = 0

--- a/api/schemas/workstream.py
+++ b/api/schemas/workstream.py
@@ -2,14 +2,30 @@ import uuid
 from datetime import date, datetime
 from typing import Literal

-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, field_validator

 from api.schemas.workstream_dependency import WorkstreamDepStub
+from api.workplan_status import normalize_workstream_status

-WorkstreamStatus = Literal["todo", "active", "blocked", "completed", "archived"]
+WorkstreamStatus = Literal[
+    "proposed",
+    "ready",
+    "active",
+    "blocked",
+    "backlog",
+    "finished",
+    "archived",
+]


-class WorkstreamCreate(BaseModel):
+class WorkstreamStatusMixin(BaseModel):
+    @field_validator("status", mode="before", check_fields=False)
+    @classmethod
+    def _normalise_status(cls, value):
+        return normalize_workstream_status(value)
+
+
+class WorkstreamCreate(WorkstreamStatusMixin):
    topic_id: uuid.UUID
    slug: str
    title: str
@@ -23,7 +39,7 @@ class WorkstreamCreate(BaseModel):
    repo_goal_id: uuid.UUID | None = None


-class WorkstreamUpdate(BaseModel):
+class WorkstreamUpdate(WorkstreamStatusMixin):
    title: str | None = None
    description: str | None = None
    status: WorkstreamStatus | None = None
@@ -35,7 +51,7 @@ class WorkstreamUpdate(BaseModel):
    repo_goal_id: uuid.UUID | None = None


-class WorkstreamRead(BaseModel):
+class WorkstreamRead(WorkstreamStatusMixin):
    model_config = ConfigDict(from_attributes=True)
    id: uuid.UUID
    topic_id: uuid.UUID
--- a/api/workplan_status.py
+++ b/api/workplan_status.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+import fnmatch
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+CANONICAL_WORKSTREAM_STATUSES: tuple[str, ...] = (
+    "proposed",
+    "ready",
+    "active",
+    "blocked",
+    "backlog",
+    "finished",
+    "archived",
+)
+
+LEGACY_WORKSTREAM_STATUS_ALIASES: dict[str, str] = {
+    "todo": "ready",
+    "done": "finished",
+    "completed": "finished",
+    "accepted": "finished",
+}
+
+SUPPORTED_WORKSTREAM_STATUSES: tuple[str, ...] = (
+    *CANONICAL_WORKSTREAM_STATUSES,
+    *LEGACY_WORKSTREAM_STATUS_ALIASES.keys(),
+)
+
+OPEN_WORKSTREAM_STATUSES: tuple[str, ...] = ("ready", "active", "blocked")
+CURRENT_WORKSTREAM_STATUSES: tuple[str, ...] = ("active", "blocked")
+CLOSED_WORKSTREAM_STATUSES: tuple[str, ...] = ("finished", "archived")
+PLANNING_WORKSTREAM_STATUSES: tuple[str, ...] = ("proposed", "ready", "backlog")
+
+
+@dataclass(frozen=True)
+class ReadyReviewStatus:
+    needs_review: bool
+    reason: str = ""
+    changed_paths: tuple[str, ...] = ()
+
+
+def normalize_workstream_status(status: Any, *, has_started: bool | None = None) -> str:
+    """Return the canonical lifecycle status for a stored or legacy value."""
+    value = _status_value(status)
+    if value == "todo" and has_started:
+        return "active"
+    return LEGACY_WORKSTREAM_STATUS_ALIASES.get(value, value)
+
+
+def is_canonical_workstream_status(status: Any) -> bool:
+    return _status_value(status) in CANONICAL_WORKSTREAM_STATUSES
+
+
+def is_supported_workstream_status(status: Any) -> bool:
+    return _status_value(status) in SUPPORTED_WORKSTREAM_STATUSES
+
+
+def workstream_has_started(task_statuses: list[Any] | tuple[Any, ...]) -> bool:
+    return any(_status_value(status) not in {"", "todo"} for status in task_statuses)
+
+
+def ready_review_status(
+    repo_dir: str | Path,
+    reviewed_against_commit: Any,
+    context_paths: Any = None,
+) -> ReadyReviewStatus:
+    """Return whether a ready workplan needs review against current repo state.
+
+    When context paths are supplied, only changes under those paths matter.
+    Missing or invalid git metadata is treated conservatively as needs review.
+    """
+    reviewed = str(reviewed_against_commit or "").strip().strip("\"'")
+    if not reviewed:
+        return ReadyReviewStatus(False)
+
+    repo = Path(repo_dir)
+    head = _git_output(repo, ["rev-parse", "HEAD"])
+    if not head:
+        return ReadyReviewStatus(True, "could not determine repository HEAD")
+    if reviewed == head:
+        return ReadyReviewStatus(False)
+
+    if not _git_commit_exists(repo, reviewed):
+        return ReadyReviewStatus(True, f"review commit {reviewed[:12]} is not available")
+
+    patterns = _as_list(context_paths)
+    if not patterns:
+        return ReadyReviewStatus(
+            True,
+            f"reviewed against {reviewed[:12]}, current HEAD is {head[:12]}",
+        )
+
+    changed = _changed_paths_since(repo, reviewed)
+    if changed is None:
+        return ReadyReviewStatus(True, "could not compare reviewed commit with HEAD")
+
+    matching = tuple(path for path in changed if _matches_any_context(path, patterns))
+    if not matching:
+        return ReadyReviewStatus(False)
+
+    return ReadyReviewStatus(
+        True,
+        f"{len(matching)} context path(s) changed since {reviewed[:12]}",
+        matching,
+    )
+
+
+def _status_value(status: Any) -> str:
+    if hasattr(status, "value") and not isinstance(status, (str, bytes, bytearray)):
+        status = status.value
+    return str(status or "").strip().lower()
+
+
+def _as_list(value: Any) -> list[str]:
+    if value is None:
+        return []
+    if isinstance(value, (list, tuple, set)):
+        return [str(item).strip().replace("\\", "/") for item in value if str(item).strip()]
+    if isinstance(value, str):
+        return [item.strip().replace("\\", "/") for item in value.split(",") if item.strip()]
+    return [str(value).strip().replace("\\", "/")]
+
+
+def _git_output(repo: Path, args: list[str]) -> str | None:
+    try:
+        return subprocess.check_output(
+            ["git", "-C", str(repo), *args],
+            stderr=subprocess.DEVNULL,
+            text=True,
+        ).strip()
+    except (subprocess.CalledProcessError, FileNotFoundError, OSError):
+        return None
+
+
+def _git_commit_exists(repo: Path, commit: str) -> bool:
+    try:
+        subprocess.run(
+            ["git", "-C", str(repo), "cat-file", "-e", f"{commit}^{{commit}}"],
+            check=True,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError, OSError):
+        return False
+
+
+def _changed_paths_since(repo: Path, commit: str) -> tuple[str, ...] | None:
+    output = _git_output(repo, ["diff", "--name-only", f"{commit}..HEAD", "--"])
+    if output is None:
+        return None
+    return tuple(path.strip().replace("\\", "/") for path in output.splitlines() if path.strip())
+
+
+def _matches_any_context(path: str, patterns: list[str]) -> bool:
+    norm_path = path.strip().replace("\\", "/").lstrip("./")
+    for raw_pattern in patterns:
+        pattern = raw_pattern.strip().replace("\\", "/").lstrip("./")
+        if not pattern:
+            continue
+        if any(char in pattern for char in "*?[]"):
+            if fnmatch.fnmatch(norm_path, pattern):
+                return True
+        elif norm_path == pattern or norm_path.startswith(f"{pattern.rstrip('/')}/"):
+            return True
+    return False