Complete workplan state model cleanup

This commit is contained in:
2026-05-18 01:31:36 +02:00
parent 98b2cb6484
commit d6522a9a40
42 changed files with 789 additions and 310 deletions

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
cleanup_stale_tasks.py — cancel tasks that are still open in completed/archived workstreams.
cleanup_stale_tasks.py — cancel tasks that are still open in finished/archived workstreams.
Run manually: python3 scripts/cleanup_stale_tasks.py
Run via make: make cleanup-stale
@@ -22,6 +22,8 @@ from datetime import datetime, timezone
# Make the api package importable when running as `python scripts/cleanup_stale_tasks.py`
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from api.workplan_status import CLOSED_WORKSTREAM_STATUSES, normalize_workstream_status
try:
from api.events import EventEnvelope, publish_event, shutdown_publisher
except Exception: # pragma: no cover — event publishing is optional
@@ -31,7 +33,7 @@ except Exception: # pragma: no cover — event publishing is optional
API = "http://127.0.0.1:8000"
STALE_STATUSES = {"todo", "in_progress", "blocked"}
CLOSED_WS_STATUS = {"completed", "archived"}
CLOSED_WS_STATUS = set(CLOSED_WORKSTREAM_STATUSES)
def get(path: str) -> list | dict:
@@ -81,7 +83,11 @@ def main() -> int:
print("[cleanup-stale] Start the API with: cd ~/state-hub && make api", file=sys.stderr)
return 1
closed_ws = {w["id"]: w for w in workstreams if w["status"] in CLOSED_WS_STATUS}
closed_ws = {
w["id"]: w
for w in workstreams
if normalize_workstream_status(w["status"]) in CLOSED_WS_STATUS
}
stale = [
t for t in tasks
@@ -93,7 +99,7 @@ def main() -> int:
print("[cleanup-stale] Nothing to cancel — all open tasks belong to active workstreams.")
return 0
print(f"[cleanup-stale] Found {len(stale)} stale task(s) in completed/archived workstreams:")
print(f"[cleanup-stale] Found {len(stale)} stale task(s) in finished/archived workstreams:")
cancelled = []
errors = []
@@ -150,7 +156,7 @@ def main() -> int:
summary = (
f"Stale-task cleanup: cancelled {len(cancelled)} task(s) "
f"across {len(by_ws)} completed workstream(s)"
f"across {len(by_ws)} finished workstream(s)"
)
detail = {
"cancelled_count": len(cancelled),

View File

@@ -12,7 +12,7 @@ Checks:
C-05 workstream-title-drift WARN Yes File title != DB title (file wins)
C-06 workstream-unlinked WARN Yes Workplan has no state_hub_workstream_id
C-07 orphan-db-active FAIL No Active DB workstream, no backing file
C-08 orphan-db-completed INFO No Completed/archived DB workstream, no file
C-08 orphan-db-closed INFO No Finished/archived DB workstream, no file
C-09 workstream-repo-mismatch FAIL Yes DB workstream repo_id != file location
C-10 task-status-drift WARN Yes Task status differs between file and DB
C-11 task-unlinked WARN Yes Task block has no state_hub_task_id
@@ -51,6 +51,20 @@ from datetime import datetime
from pathlib import Path
from typing import Any
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
from api.workplan_status import ( # noqa: E402
CANONICAL_WORKSTREAM_STATUSES,
CLOSED_WORKSTREAM_STATUSES,
LEGACY_WORKSTREAM_STATUS_ALIASES,
OPEN_WORKSTREAM_STATUSES,
SUPPORTED_WORKSTREAM_STATUSES,
normalize_workstream_status as _normalize_workstream_status,
ready_review_status,
)
try:
import yaml as _yaml
_HAS_YAML = True
@@ -71,19 +85,15 @@ except ImportError:
_TASK_BLOCK_RE = re.compile(r"```task\s*\n(.*?)\n```", re.DOTALL)
_HEADING_RE = re.compile(r"^#{1,4}\s+(.+?)$", re.MULTILINE)
_ARCHIVED_WP_RE = re.compile(r"^\d{6}-(.+\.md)$")
VALID_WP_STATUSES = {"active", "completed", "archived"}
VALID_WP_STATUSES = set(CANONICAL_WORKSTREAM_STATUSES)
SUPPORTED_WP_STATUSES = set(SUPPORTED_WORKSTREAM_STATUSES)
VALID_TASK_STATUSES = {"todo", "in_progress", "blocked", "done", "cancelled"}
VALID_TASK_PRIORITIES = {"low", "medium", "high", "critical"}
VALID_DEP_RELATIONSHIPS = {"blocks", "starts_after", "informs", "soft_dependency"}
DEFAULT_REMOTE_ALL_MAX_SECONDS = int(os.environ.get("CONSISTENCY_REMOTE_ALL_MAX_SECONDS", "300"))
# Workplan files use task-style vocabulary ("done"); the DB workstream API uses
# "completed". This map translates file values to DB values before comparison
# and before PATCHing, so "done" vs "completed" is never flagged as C-04 drift.
FILE_TO_DB_WORKSTREAM_STATUS: dict[str, str] = {
"done": "completed",
"todo": "active", # workplan not yet started → active workstream in DB
}
# Legacy file/API aliases translated before comparison and PATCHing.
FILE_TO_DB_WORKSTREAM_STATUS: dict[str, str] = dict(LEGACY_WORKSTREAM_STATUS_ALIASES)
# Ordinal ranking for task statuses used by the no-regress rule (T01/C-15).
# blocked and in_progress share rank 1 — both are "in flight".
@@ -96,9 +106,9 @@ STATUS_ORDER: dict[str, int] = {
}
def normalise_workstream_status(status: str) -> str:
def normalise_workstream_status(status: str, *, has_started: bool | None = None) -> str:
"""Translate a workplan file status value to its DB-canonical equivalent."""
return FILE_TO_DB_WORKSTREAM_STATUS.get(status, status)
return _normalize_workstream_status(status, has_started=has_started)
def canonical_workplan_filename(path: Path) -> str:
@@ -593,10 +603,11 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
file_title = str(meta.get("title", "")).strip()
file_domain = str(meta.get("domain", "")).strip()
if archived_file and normalise_workstream_status(file_status) == "active":
normalised_file_status = normalise_workstream_status(file_status)
if archived_file and normalised_file_status not in CLOSED_WORKSTREAM_STATUSES:
report.add(
severity="FAIL", check_id="C-18",
message="Archived workplan file has active/todo status",
message="Archived workplan file has an open or planning status",
file_path=fname,
file_value=file_status,
fixable=False,
@@ -652,10 +663,10 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
# Continue to check drift even with mismatched repo
# C-04: status drift — normalise file value before comparing so that
# "done" (file) vs "completed" (DB) is not treated as drift.
# legacy file/API aliases are not treated as drift.
db_status = ws.get("status", "")
normalised_file_status = normalise_workstream_status(file_status)
if file_status and db_status and normalised_file_status != db_status:
normalised_db_status = normalise_workstream_status(db_status)
if file_status and db_status and normalised_file_status != normalised_db_status:
report.add(
severity="WARN", check_id="C-04",
message=(
@@ -674,6 +685,28 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
},
)
if normalised_file_status == "ready":
review = ready_review_status(
repo_dir,
meta.get("reviewed_against_commit"),
meta.get("context_paths"),
)
if review.needs_review:
detail = f"Ready workplan may be stale: {review.reason}"
if review.changed_paths:
preview = ", ".join(review.changed_paths[:5])
extra = "" if len(review.changed_paths) <= 5 else ", ..."
detail = f"{detail}; changed paths: {preview}{extra}"
report.add(
severity="WARN",
check_id="C-21",
message=detail,
file_path=fname,
file_value=file_status,
db_value="needs_review",
fixable=False,
)
# C-05: title drift
db_title = ws.get("title", "")
if file_title and db_title and file_title != db_title:
@@ -888,7 +921,7 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
# C-12: DB tasks with no file backing
if isinstance(db_tasks, list):
ws_status = ws.get("status", "")
ws_finished = ws_status in ("completed", "archived")
ws_finished = normalise_workstream_status(ws_status) in CLOSED_WORKSTREAM_STATUSES
for db_t in db_tasks:
if db_t["id"] not in file_task_sh_ids:
db_t_status = db_t.get("status", "")
@@ -912,7 +945,7 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
# C-13: all DB tasks done but workstream still active — worker forgot to close
db_status = ws.get("status", "")
if db_status == "active" and isinstance(db_tasks, list) and db_tasks:
if normalise_workstream_status(db_status) == "active" and isinstance(db_tasks, list) and db_tasks:
non_terminal = [
t for t in db_tasks
if t.get("status") not in ("done", "cancelled")
@@ -932,7 +965,7 @@ def check_repo(api_base: str, repo_slug: str, repo_path_override: str | None = N
_fix_context={
"ws_id": ws_id,
"field": "status",
"value": "completed",
"value": "finished",
},
)
@@ -963,26 +996,27 @@ def _check_orphan_db(
for ws in all_ws:
ws_id = ws["id"]
ws_status = ws.get("status", "")
if ws_status == "active" and ws_id in active_file_ws_ids:
normalised_status = normalise_workstream_status(ws_status)
if normalised_status not in CLOSED_WORKSTREAM_STATUSES and ws_id in active_file_ws_ids:
continue
if ws_status in ("completed", "archived") and ws_id in file_ws_ids:
if normalised_status in CLOSED_WORKSTREAM_STATUSES and ws_id in file_ws_ids:
continue
ws_slug = ws.get("slug", "")
if ws_status == "active":
if normalised_status not in CLOSED_WORKSTREAM_STATUSES:
report.add(
severity="FAIL", check_id="C-07",
message=(
f"Active DB workstream '{ws_slug}' (id={ws_id[:8]}…) "
f"Non-closed DB workstream '{ws_slug}' (id={ws_id[:8]}…) "
f"has no backing workplan file — ADR-001 violation"
),
db_id=ws_id,
fixable=False,
)
elif ws_status in ("completed", "archived"):
elif normalised_status in CLOSED_WORKSTREAM_STATUSES:
report.add(
severity="INFO", check_id="C-08",
message=(
f"Completed/archived DB workstream '{ws_slug}' "
f"Closed DB workstream '{ws_slug}' "
f"(id={ws_id[:8]}…, status={ws_status}) has no backing workplan file"
),
db_id=ws_id,
@@ -1019,9 +1053,11 @@ def _check_ghost_duplicates(
topic_ids.add(ws["topic_id"])
for topic_id in topic_ids:
topic_ws = _api_get(api_base, "/workstreams", {"topic_id": topic_id, "status": "active"})
if not isinstance(topic_ws, list):
continue
topic_ws: list[dict] = []
for status in OPEN_WORKSTREAM_STATUSES:
status_rows = _api_get(api_base, "/workstreams", {"topic_id": topic_id, "status": status})
if isinstance(status_rows, list):
topic_ws.extend(status_rows)
for ws in topic_ws:
ws_id = ws["id"]
if ws_id in file_ws_ids:
@@ -1166,9 +1202,13 @@ def _write_custodian_brief(api_base: str, repo_slug: str, repo_path: str) -> boo
domain_slug: str = ""
# Resolve domain slug: prefer active workstreams, fall back to any workstream
# so that a fully-completed repo doesn't degrade to "(unknown)".
workstreams = _api_get(api_base, "/workstreams", {"repo_id": repo_id, "status": "active"}) or []
_ws_for_domain = workstreams if (isinstance(workstreams, list) and workstreams) else []
# so that a fully-finished repo doesn't degrade to "(unknown)".
workstreams: list[dict] = []
for status in OPEN_WORKSTREAM_STATUSES:
rows = _api_get(api_base, "/workstreams", {"repo_id": repo_id, "status": status}) or []
if isinstance(rows, list):
workstreams.extend(rows)
_ws_for_domain = workstreams if workstreams else []
if not _ws_for_domain:
all_ws = _api_get(api_base, "/workstreams", {"repo_id": repo_id}) or []
_ws_for_domain = all_ws if isinstance(all_ws, list) else []
@@ -1379,7 +1419,8 @@ def fix_repo(
wp_id = str(meta.get("id", "")).strip()
title = str(meta.get("title", "")).strip()
status = str(meta.get("status", "active")).strip()
if status not in ("active", "completed", "archived"):
status = normalise_workstream_status(status)
if status not in VALID_WP_STATUSES:
status = "active"
# Find topic_id for this domain
@@ -1500,7 +1541,7 @@ def fix_repo(
t_id = str(task.get("id", "")).strip()
# Skip creating tasks for finished workstreams — the workstream is
# done/archived so unlinked tasks are stale file artefacts, not gaps.
if ws_status in ("completed", "archived"):
if normalise_workstream_status(ws_status) in CLOSED_WORKSTREAM_STATUSES:
report.fixes_applied.append(
f"C-11 skipped: task '{t_id}' in {ws_status} workstream — not created"
)
@@ -1596,7 +1637,7 @@ def fix_repo(
# Check IDs that are known-background noise in multi-machine setups:
# C-08 = completed/archived DB workstream with no file (pre-ADR-001 legacy)
# C-08 = finished/archived DB workstream with no file (pre-ADR-001 legacy)
# These alone do not warrant a pull+fix cycle.
_BACKGROUND_CHECKS: frozenset[str] = frozenset({"C-08"})
@@ -1707,7 +1748,7 @@ def archive_closed_workplans(
) -> list[str]:
"""Move closed root workplans into workplans/archived/ with YYMMDD prefix.
Only root-level files whose frontmatter status normalises to completed or
Only root-level files whose frontmatter status normalises to finished or
archived are moved. Files with any open task blocks are left in place.
"""
repo_dir = Path(repo_path)
@@ -1732,7 +1773,7 @@ def archive_closed_workplans(
if wanted not in {str(meta.get("id", "")), wp_file.stem, wp_file.name}:
continue
status = normalise_workstream_status(str(meta.get("status", "")).strip())
if status not in ("completed", "archived"):
if status not in CLOSED_WORKSTREAM_STATUSES:
continue
tasks = get_tasks_from_workplan(meta, body)
open_tasks = [

View File

@@ -82,7 +82,7 @@ curl -s -X PATCH "http://127.0.0.1:8000/tasks/<task_id>" \
**Start:**
1. `cat .custodian-brief.md` — domain goal and open workstreams (offline-safe)
2. Check inbox: `GET /messages/?to_agent={REPO_SLUG}&unread_only=true`; mark read
3. Scan workplans: `ls workplans/` — note `status: active` files and open tasks
3. Scan workplans: `ls workplans/` — note `status: ready`, `active`, or `blocked` files and open tasks
4. Check blocked tasks: `GET /tasks/?needs_human=true`
**During work:**
@@ -108,7 +108,7 @@ read/cache/index layer that rebuilds from files.
**File location:** `workplans/{WP_PREFIX}-NNNN-<slug>.md`
**Archived location:** completed workplans may move to
**Archived location:** finished workplans may move to
`workplans/archived/YYMMDD-{WP_PREFIX}-NNNN-<slug>.md`. The `YYMMDD` prefix is
the completion/archive date; the frontmatter `id` does not change.
@@ -126,7 +126,7 @@ type: workplan
title: "..."
domain: {DOMAIN}
repo: {REPO_SLUG}
status: active | done
status: proposed | ready | active | blocked | backlog | finished | archived
owner: codex
topic_slug: ...
created: "YYYY-MM-DD"
@@ -135,6 +135,10 @@ state_hub_workstream_id: "<uuid>" # written by fix-consistency — do not edit
---
```
Use `proposed` for a new draft, `ready` after review against current repo
state, and `finished` after implementation. `stalled` and `needs_review` are
derived health labels, not frontmatter statuses.
**Task block format** (one per `##` section):
```

View File

@@ -25,7 +25,8 @@ requests before proceeding.
```bash
ls workplans/
```
For each file with `status: active`, note pending `todo`/`in_progress` tasks.
For each file with `status: ready`, `active`, or `blocked`, note pending
`todo`/`in_progress` tasks.
**Step 4 — Present brief**

View File

@@ -5,6 +5,12 @@ ID prefix: `{WP_PREFIX}`
Work items originate as files in this repo **before** being registered in the hub.
Canonical workplan/workstream frontmatter statuses are:
`proposed`, `ready`, `active`, `blocked`, `backlog`, `finished`, `archived`.
Use `proposed` for a newly drafted plan, `ready` after review against current
repo state, and `finished` when implementation is complete. `stalled` and
`needs_review` are derived health labels, not stored statuses.
Closed workplans may be moved to `workplans/archived/` with a completion-date
prefix: `YYMMDD-{REPO_SLUG}-WP-NNNN-<slug>.md`. The frontmatter id remains
unchanged; the prefix is only for quick visual reference.

View File

@@ -40,6 +40,16 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
from api.workplan_status import ( # noqa: E402
CANONICAL_WORKSTREAM_STATUSES,
SUPPORTED_WORKSTREAM_STATUSES,
normalize_workstream_status,
)
try:
import yaml as _yaml
_HAS_YAML = True
@@ -58,7 +68,8 @@ except ImportError:
# ---------------------------------------------------------------------------
REQUIRED_FRONTMATTER = {"id", "type", "title", "domain", "status", "owner", "created"}
VALID_WP_STATUSES = {"active", "completed", "archived"}
VALID_WP_STATUSES = set(CANONICAL_WORKSTREAM_STATUSES)
SUPPORTED_WP_STATUSES = set(SUPPORTED_WORKSTREAM_STATUSES)
VALID_TASK_STATUSES = {"todo", "in_progress", "blocked", "done", "cancelled"}
VALID_TASK_PRIORITIES = {"low", "medium", "high", "critical"}
@@ -198,11 +209,14 @@ def _check_workplan_file(wp_file: Path, report: Report) -> dict | None:
# status
status = str(meta.get("status", ""))
if status not in VALID_WP_STATUSES:
if status not in SUPPORTED_WP_STATUSES:
report.add(Level.FAIL, "frontmatter-status",
f"status must be one of {sorted(VALID_WP_STATUSES)}, got {status!r}", fname)
f"status must be one of {sorted(VALID_WP_STATUSES)} "
f"(legacy aliases accepted: {sorted(SUPPORTED_WP_STATUSES - VALID_WP_STATUSES)}), "
f"got {status!r}", fname)
else:
report.add(Level.PASS, "frontmatter-status", f"status={status}", fname)
report.add(Level.PASS, "frontmatter-status",
f"status={normalize_workstream_status(status)}", fname)
# id format
wp_id = str(meta.get("id", ""))
@@ -363,7 +377,7 @@ def check_api(api_base: str, metas: list[dict], domain_slug: str | None,
continue
for ws in workstreams:
ws_status = ws.get("status", "")
if ws_status in ("completed", "archived"):
if normalize_workstream_status(ws_status) in {"finished", "archived"}:
continue
ws_id = ws["id"]
ws_slug = ws.get("slug", "")