chore: close overview counts and review reliability workplans

2026-06-07 17:36:59 +02:00
parent b14844351c
commit 99a66765f3
6 changed files with 351 additions and 9 deletions
--- a/dashboard/src/components/workplan-status.js
+++ b/dashboard/src/components/workplan-status.js
@@ -31,8 +31,8 @@ export function isOpenWorkstream(status) {
  return OPEN_WORKSTREAM_STATUSES.includes(normalizeWorkstreamStatus(status));
 }

-export function isStalledWorkstream(w, staleDays = 7) {
-  const staleAt = new Date(Date.now() - staleDays * 24 * 60 * 60 * 1000);
+export function isStalledWorkstream(w) {
+  const staleAt = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
  const openTasks = (w.todo ?? 0) + (w.progress ?? 0) + (w.wait ?? 0);
  return ["active", "blocked"].includes(normalizeWorkstreamStatus(w.status))
    && new Date(w.updated_at) < staleAt
--- a/dashboard/src/index.md
+++ b/dashboard/src/index.md
@@ -225,7 +225,7 @@ function _workstreamsForMode(mode, rows) {
    return allRows.filter(w => normalizeWorkstreamStatus(w.status) === modeValue);
  }
  if (modeValue === "needs_review") return allRows.filter(needsReviewWorkstream);
-  if (modeValue === "stalled") return allRows.filter(isStalledWorkstream);
+  if (modeValue === "stalled") return allRows.filter(w => isStalledWorkstream(w));

  const since = _timeCutoff(modeValue);
  if (!since) return allRows.filter(w => normalizeWorkstreamStatus(w.status) === "active");
--- a/dashboard/test/workplan-status.test.mjs
+++ b/dashboard/test/workplan-status.test.mjs
@@ -0,0 +1,45 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+
+import {isStalledWorkstream} from "../src/components/workplan-status.js";
+
+test("stalled workstream predicate is safe to pass to Array.filter", () => {
+  const realNow = Date.now;
+  Date.now = () => new Date("2026-06-07T15:00:00Z").getTime();
+
+  try {
+    const rows = [
+      {
+        title: "stale active",
+        status: "active",
+        updated_at: "2026-05-20T12:00:00Z",
+        done: 1,
+        progress: 1,
+        todo: 0,
+        wait: 0,
+      },
+      {
+        title: "fresh active",
+        status: "active",
+        updated_at: "2026-06-06T12:00:00Z",
+        done: 1,
+        progress: 1,
+        todo: 0,
+        wait: 0,
+      },
+      {
+        title: "stale finished",
+        status: "finished",
+        updated_at: "2026-05-20T12:00:00Z",
+        done: 2,
+        progress: 0,
+        todo: 0,
+        wait: 0,
+      },
+    ];
+
+    assert.deepEqual(rows.filter(isStalledWorkstream).map(w => w.title), ["stale active"]);
+  } finally {
+    Date.now = realNow;
+  }
+});
--- a/workplans/STATE-WP-0057-overview-workstream-stage-counts.md
+++ b/workplans/STATE-WP-0057-overview-workstream-stage-counts.md
@@ -4,7 +4,7 @@ type: workplan
 title: "Overview Workstream Stage Counts"
 domain: custodian
 repo: state-hub
-status: active
+status: finished
 owner: codex
 topic_slug: custodian
 created: "2026-06-07"
@@ -170,7 +170,7 @@ small and large count values.

 ```task
 id: STATE-WP-0057-T05
-status: progress
+status: done
 priority: medium
 state_hub_task_id: "5e673e25-407d-43f0-95d6-5a596afc5b3b"
 ```
@@ -196,7 +196,7 @@ no longer appears as an open suggestion after verified completion.

 ```task
 id: STATE-WP-0057-T06
-status: wait
+status: done
 priority: high
 state_hub_task_id: "8513290e-02a4-428d-8f1a-5de4fe447aa8"
 ```
@@ -253,6 +253,24 @@ accurate workstream counts for all mode groups.
  now call `_setChartMode`, and the Plot cell depends directly on
  `_chartModeState`. The running preview and built `dist/index.html` both show
  that dependency shape. `npm run test` and a clean `npm run build` both passed.
- Browser click-through remains pending because the Codex in-app browser bridge
-  failed to start in this session with a Windows sandbox setup failure, and no
-  local Playwright/Puppeteer package is installed for a headless fallback.
+- The earlier browser click-through gap was closed in the final pass below with
+  a temporary Playwright/Chromium fallback.
+- 2026-06-07 final closeout: Installed a temporary Playwright/Chromium fallback
+  under `/tmp/statehub-playwright` after the in-app browser bridge still failed.
+  Browser verification against the local preview on `http://127.0.0.1:3001/`
+  confirmed lifecycle, health, and recently-changed selector counts match chart
+  rows; desktop and mobile selector fit passed; polling refresh updated counts
+  without resetting the selected mode; stale/offline refresh kept last-known
+  counts; and a synthetic empty recent window rendered the expected empty state.
+- Final verification found and fixed a stalled-health count bug:
+  `_workstreamsForMode("stalled", rows)` was passing `isStalledWorkstream`
+  directly to `Array.filter`, so row indexes were being treated as the helper's
+  optional `staleDays` argument. The Overview page now wraps the helper call,
+  and `isStalledWorkstream` no longer accepts that positional override. Added a
+  regression test covering `rows.filter(isStalledWorkstream)`.
+- Final checks passed: `npm --prefix dashboard run test` passed 12 tests,
+  `make dashboard-check` built 61 pages and validated 49 links, and final
+  Playwright smoke verified `stalled (4)` with four rendered chart rows.
+- Suggestion `70e9bfd4-235d-4677-b053-39b78af8e5aa` now has the final review
+  evidence note and status `finished`. API verification shows UI Feedback
+  active suggestions = 0, closed suggestions = 8, and Todo open suggestions = 0.
--- a/workplans/STATE-WP-0059-mcp-write-layer-reliability.md
+++ b/workplans/STATE-WP-0059-mcp-write-layer-reliability.md
@@ -0,0 +1,145 @@
+---
+id: STATE-WP-0059
+type: workplan
+title: "State Hub MCP write-layer reliability"
+domain: custodian
+repo: state-hub
+status: ready
+owner: codex
+topic_slug: custodian
+created: "2026-06-07"
+updated: "2026-06-07"
+state_hub_workstream_id: "0c9f9ed3-235b-4291-bfa3-cc08699b02b4"
+---
+
+# State Hub MCP Write-Layer Reliability
+
+**Origin:** infrastructure-friction analysis from the `helix_forge` domain
+(`agentic-resources/docs/ASSESSMENT-infra-friction.md`).
+
+## Critical Review
+
+This workplan conforms with `INTENT.md`: State Hub owns the local-first
+coordination service and FastMCP tools that expose topics, workstreams, tasks,
+decisions, and progress. Fixing a flaky MCP write wrapper keeps the hub usable
+as an agent coordination surface without moving canon out of workplan files or
+turning State Hub into a task factory.
+
+Implementation should stay narrow:
+
+- Keep MCP as a stateless HTTP client over the FastAPI API; do not add direct DB
+  writes or duplicate API business logic in `mcp_server/server.py`.
+- Preserve the REST API as the source contract and make MCP responses/error
+  handling match it.
+- Do not fold the bulk-write work from `STATE-WP-0058` into this workplan; this
+  plan is about correctness and reliable error surfacing for existing writes.
+
+Repo review on 2026-06-07 found the likely fault surfaces:
+
+- `mcp_server/server.py` implements the affected write tools directly:
+  `create_workstream`, `create_task`, `update_task_status`, `record_decision`,
+  and `add_progress_event`.
+- `_get` / `_post` / `_patch` return `{"error": ...}` on HTTP/client failure.
+  Several write tools then assume successful response fields exist; for example
+  `update_task_status` formats `task['title']`, which can explain the observed
+  `'title'` KeyError when the underlying PATCH returned an error object.
+- Existing `tests/test_mcp_smoke.py` verifies REST endpoint shapes the MCP tools
+  depend on, but it explicitly does not exercise the MCP protocol/tool wrapper
+  itself. This gap is where the `-32602 Invalid request parameters` family can
+  hide.
+
+The State Hub MCP **write** tools fail while the **REST API stays healthy**
+(`/state/health` → 200). Observed continuously during a helix_forge work session:
+
+- `update_task_status` → `'title'` **KeyError**
+- `add_progress_event` / `create_workstream` / `create_task` → `-32602 Invalid
+  request parameters`
+
+Every hub write that session had to fall back to REST `PATCH`/`POST`. The
+helix_forge error-mining corroborates it independently: this error family recurs
+in **4 captured sessions across 3 repos**.
+
+This is a **reliability defect in the MCP wrapper layer** (the API and DB are
+fine), and it is dangerous: an agent with no REST fallback would silently fail to
+record status/progress, leaving file↔hub drift unreconciled.
+
+## Reproduce + Diagnose the MCP Write Failures
+
+```task
+id: STATE-WP-0059-T01
+status: todo
+priority: high
+state_hub_task_id: "7fc04b00-2493-4310-ae76-101660621da6"
+```
+
+Reproduce the `update_task_status` `'title'` KeyError and the `-32602` on
+`add_progress_event` / `create_workstream` / `create_task` against the working REST
+contract. Capture the exact failing request payloads and server-side
+stack/response. Pin down response-serialization (KeyError `'title'`) vs
+request-param-validation (`-32602`) and which tools each affects.
+
+Done when a focused failing test or diagnostic script exercises the real MCP
+tool wrapper, not only the FastAPI endpoint, and records:
+
+- the MCP tool name and argument payload,
+- the corresponding REST request/response,
+- whether failure happens before the HTTP call, during HTTP error handling, or
+  during MCP response serialization.
+
+## Fix the MCP Serialization / Param-Validation Layer
+
+```task
+id: STATE-WP-0059-T02
+status: todo
+priority: high
+state_hub_task_id: "2636e1d5-142e-463f-875e-4dc1edf12853"
+```
+
+Fix the MCP wrapper so the write tools succeed and return the same shape as their
+REST equivalents. Add an end-to-end regression test exercising each write tool
+(`create_workstream` / `create_task` / `update_task_status` / `add_progress_event`
+/ `record_decision`) so the MCP and REST paths stay in parity.
+
+Implementation notes:
+
+- Add a small shared MCP response helper rather than open-coding success/error
+  assumptions in each write tool.
+- If the API helper returns an error object, return a clear MCP-visible error
+  payload and do not emit follow-up progress events that pretend the write
+  succeeded.
+- Ensure optional arguments and structured values such as `detail` are declared
+  and normalized in a way FastMCP can validate consistently.
+- Keep existing automatic progress-event side effects for successful writes.
+- Update `tests/test_mcp_smoke.py` or add a sibling test that invokes the MCP
+  tool layer directly enough to catch `-32602` and response-field KeyErrors.
+
+## Verify + Harden Error Surfacing
+
+```task
+id: STATE-WP-0059-T03
+status: todo
+priority: medium
+state_hub_task_id: "af137d6b-07ca-4110-abb0-45a96b84b7a3"
+```
+
+Verify the fixed tools against the REST contract across a representative session
+flow. Ensure failures surface clearly (no silent success) so agents can fall back
+deterministically. Document the MCP↔REST parity guarantee in
+`mcp_server/TOOLS.md`, including the expected fallback behavior when the API is
+unreachable.
+
+Done when:
+
+- the MCP write regression tests pass,
+- a manual local flow can create a workstream, create a task, update task
+  status, record a decision, and add progress through MCP while REST
+  verification confirms matching records,
+- an induced API failure returns a clear error instead of a KeyError or false
+  success,
+- the session closes with `make fix-consistency REPO=state-hub`.
+
+After workplan updates, run from `~/state-hub`:
+
+```bash
+make fix-consistency REPO=state-hub
+```
--- a/workplans/STATE-WP-0060-fix-consistency-cross-flavor.md
+++ b/workplans/STATE-WP-0060-fix-consistency-cross-flavor.md
@@ -0,0 +1,134 @@
+---
+id: STATE-WP-0060
+type: workplan
+title: "Fix cross-flavor make fix-consistency failures"
+domain: custodian
+repo: state-hub
+status: ready
+owner: codex
+topic_slug: custodian
+created: "2026-06-07"
+updated: "2026-06-07"
+state_hub_workstream_id: "557ea19e-64bf-44d9-a288-e8ad692a3754"
+---
+
+# Fix Cross-Flavor `make fix-consistency` Failures
+
+**Origin:** infrastructure-friction analysis from the `helix_forge` domain
+(`agentic-resources/docs/ASSESSMENT-infra-friction.md`, error mining
+AGENTIC-WP-0006).
+
+## Critical Review
+
+This workplan conforms with `INTENT.md`: consistency tooling is one of State
+Hub's explicit responsibilities, and `fix-consistency` is the bridge that keeps
+files-first workplans synchronized into the live read model. Improving its
+caller-facing reliability is State Hub work.
+
+The original draft was too broad in one place. Current repo review on
+2026-06-07 shows the single-repo Make targets already convert warning-only exit
+code `2` to shell success:
+
+- `make check-consistency REPO=...` runs `scripts/consistency_check.py` and then
+  maps `e == 2` to `exit 0`.
+- `make fix-consistency REPO=...` does the same.
+- The underlying CLI still exits `2` for warnings, which is useful for strict
+  machine callers but can confuse agents or wrappers if they call the script
+  directly, use an aggregate target, or hit a repo-specific unrecoverable case.
+
+Implementation should therefore diagnose the exact caller path before changing
+semantics. Do not blindly make all warnings disappear: warnings must remain
+visible in text/JSON output and in the structured result. Do not relax genuine
+failures just to make automation green.
+
+`make fix-consistency` exits non-zero —
+`make: *** [...: fix-consistency] Error 1` — in **5 captured coding sessions
+across 4 repos**. It is the **only error category that spans both Claude and
+Grok**; every other recurring error in the corpus is Claude-only, which makes
+this the one cross-flavor signal we can trust as not agent-specific.
+
+`fix-consistency` is the **ADR-001 reconciliation mechanism** that syncs the
+files-first workplans with the hub read model. When it fails, workplan/hub drift
+goes unreconciled — so a flaky exit here undermines the whole coordination model.
+(Note: on healthy single-repo Make runs it can print `PASS (with warnings)` and
+still exit 0 because the Make target normalizes warning exit code `2`.)
+
+## Root-Cause the Non-Zero Exit Path
+
+```task
+id: STATE-WP-0060-T01
+status: todo
+priority: high
+state_hub_task_id: "011a49ad-13a5-46f7-849d-f7b1a0bca005"
+```
+
+Reproduce the failure across the repos/flavors that hit it. Identify the failure
+mode at the actual caller path — single-repo Make target, direct
+`scripts/consistency_check.py`, aggregate target, post-commit hook, remote path,
+or repo-specific reconciliation failure. Capture the exact command, output, and
+exit code.
+
+Done when the diagnosis classifies each captured failure into one of:
+
+- warning-only direct CLI exit `2` misinterpreted by an agent/wrapper,
+- aggregate/remote target exit handling that differs from the single-repo Make
+  target,
+- real unrecoverable reconciliation failure,
+- environment/setup issue such as missing `uv`, unreachable API, stale repo
+  path, or missing write permission.
+
+Also record whether the older `Makefile:227` line number simply refers to a
+previous file version; the current `fix-consistency` target lives later in the
+Makefile.
+
+## Fix Exit Semantics / Failing Reconciliation Case
+
+```task
+id: STATE-WP-0060-T02
+status: todo
+priority: high
+state_hub_task_id: "49388ab7-db45-4bdb-a89e-bb7f116afd47"
+```
+
+Fix the exact failing path without hiding drift.
+
+Implementation notes:
+
+- Preserve explicit warning reporting in CLI text and JSON output.
+- Keep direct script exit semantics if callers rely on `2` to distinguish
+  warnings, unless the diagnosis shows that contract is the root defect and the
+  repo intentionally changes it.
+- Ensure all agent/operator-facing Make targets, hooks, and documented commands
+  that say warnings are acceptable actually return shell success on warnings-only
+  runs.
+- If a real reconciliation bug is found, fix the underlying case rather than
+  weakening the exit code.
+- Add tests for the boundary: clean = success, warnings-only = agent-facing
+  success with visible warnings, real failures = non-zero.
+
+## Verify Across Affected Repos + Document
+
+```task
+id: STATE-WP-0060-T03
+status: todo
+priority: medium
+state_hub_task_id: "c9939dcb-37da-4073-a5f6-06f94fc7807e"
+```
+
+Verify `fix-consistency` now passes on the repos that previously failed
+(cross-flavor). Document the exit-code contract for callers (agents + operators)
+near the Make targets or consistency docs, including when to use direct CLI
+strictness versus Make wrappers.
+
+Done when:
+
+- the affected repos/flavors no longer report warning-only runs as failed,
+- a genuine failure fixture still exits non-zero,
+- `make fix-consistency REPO=state-hub` succeeds after this repo's workplan
+  updates.
+
+After workplan updates, run from `~/state-hub`:
+
+```bash
+make fix-consistency REPO=state-hub
+```