From c0b4b984b0210d44607f0f97aeaf61cf078ff662 Mon Sep 17 00:00:00 2001 From: Bernd Worsch Date: Sun, 29 Mar 2026 21:38:57 +0000 Subject: [PATCH] chore: register Phase 7 workplan (IHUB-WP-0007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 7 tasks: T01 schema (FrictionScore, BottleneckRecord, HubHealthSnapshot, CrossHubPropagation) → T02 widget pain heatmap → T03 workflow bottleneck analysis → T04 hub health correlation → T05 cross-hub propagation → T06 operational review board → T07 gate. Co-Authored-By: Claude Sonnet 4.6 --- ...servability-and-operational-integration.md | 383 ++++++++++++++++++ 1 file changed, 383 insertions(+) create mode 100644 workplans/IHUB-WP-0007-ihf-phase7-advanced-observability-and-operational-integration.md diff --git a/workplans/IHUB-WP-0007-ihf-phase7-advanced-observability-and-operational-integration.md b/workplans/IHUB-WP-0007-ihf-phase7-advanced-observability-and-operational-integration.md new file mode 100644 index 0000000..29ed5d0 --- /dev/null +++ b/workplans/IHUB-WP-0007-ihf-phase7-advanced-observability-and-operational-integration.md @@ -0,0 +1,383 @@ +--- +id: IHUB-WP-0007 +type: workplan +title: "IHF Phase 7 — Advanced Observability and Operational Integration" +domain: inter_hub +repo: inter-hub +status: todo +owner: custodian +topic_slug: inter_hub +created: "2026-03-29" +updated: "2026-03-29" +state_hub_workstream_id: "541e06b3-c7ff-4a27-9259-338e04b2aa2f" +--- + +# IHF Phase 7 — Advanced Observability and Operational Integration + +## Goal + +Integrate interaction governance with broader operational intelligence. Phase 6 +established cross-framework widget participation. Phase 7 turns the accumulated +interaction data into operational intelligence: friction heatmaps, pipeline +bottleneck detection, per-hub health scores, and cross-hub pattern propagation. +The capstone is an Operational Review Board dashboard that gives hub leaders a +unified view across all hubs. + +## Background + +Phases 1–6 are complete. The IHF core (widget registry, interaction events, +annotations, requirements, decisions, outcomes, agent assistance, +cross-framework adapters) is stable and extensible. + +The spec (§Phase 7) calls for: +- Hub health correlation +- Policy violation correlation +- Workflow bottleneck analysis +- Interaction pain heatmaps +- Queue and job linkage +- Cross-hub issue propagation analysis + +Artifacts introduced: `FrictionScore`, `BottleneckRecord`, `HubHealthSnapshot`, +`CrossHubPropagation`. + +Reference: `specs/InteractionHubFrameworkSpecification_v0.1.md` §Phase 7, +`docs/phase6-summary.md`, `docs/ihp-controllers-views-forms.md`. + +## Phase 7 Exit Criteria (from IHF spec §Phase 7) + +- Interaction data informs operational decision-making +- Hub leaders can inspect systemic friction patterns +- The platform supports cross-domain learning + +## Data Artifacts Introduced (Phase 7) + +`FrictionScore`, `BottleneckRecord`, `HubHealthSnapshot`, `CrossHubPropagation` + +--- + +## Tasks + +### T01 — Schema: FrictionScore, BottleneckRecord, HubHealthSnapshot, CrossHubPropagation + +```task +id: IHUB-WP-0007-T01 +status: todo +priority: high +state_hub_task_id: "86e31f8b-62a3-4176-9b10-2fe7a8dbcc23" +``` + +Add Phase 7 tables to `Application/Schema.sql` and write migration: + +```sql +-- Aggregated pain score per widget, recomputed on demand or scheduled. +CREATE TABLE friction_scores ( + id UUID DEFAULT uuid_generate_v4() PRIMARY KEY NOT NULL, + widget_id UUID NOT NULL REFERENCES widgets(id), + score INTEGER NOT NULL DEFAULT 0, + -- 0–100; higher = more friction + annotation_count INTEGER NOT NULL DEFAULT 0, + error_event_count INTEGER NOT NULL DEFAULT 0, + regression_flag BOOLEAN NOT NULL DEFAULT FALSE, + stale_candidate_count INTEGER NOT NULL DEFAULT 0, + last_computed_at TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, + UNIQUE (widget_id) +); + +CREATE INDEX friction_scores_widget_id_idx ON friction_scores (widget_id); +CREATE INDEX friction_scores_score_idx ON friction_scores (score DESC); + +-- Detected stalls at specific pipeline stages. +CREATE TABLE bottleneck_records ( + id UUID DEFAULT uuid_generate_v4() PRIMARY KEY NOT NULL, + hub_id UUID NOT NULL REFERENCES hubs(id), + stage TEXT NOT NULL, + -- 'candidate' | 'requirement' | 'decision' | 'observation' + subject_type TEXT NOT NULL, + -- 'RequirementCandidate' | 'Requirement' | 'DecisionRecord' | 'DeploymentRecord' + subject_id UUID NOT NULL, + stalled_since TIMESTAMP WITH TIME ZONE NOT NULL, + severity TEXT NOT NULL DEFAULT 'medium', + -- 'low' | 'medium' | 'high' | 'critical' + resolved_at TIMESTAMP WITH TIME ZONE, + notes TEXT, + created_at TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL +); + +CREATE INDEX bottleneck_records_hub_id_idx ON bottleneck_records (hub_id); +CREATE INDEX bottleneck_records_stage_idx ON bottleneck_records (stage); +CREATE INDEX bottleneck_records_resolved_idx ON bottleneck_records (resolved_at) + WHERE resolved_at IS NULL; + +-- Periodic health snapshots for trend tracking. +CREATE TABLE hub_health_snapshots ( + id UUID DEFAULT uuid_generate_v4() PRIMARY KEY NOT NULL, + hub_id UUID NOT NULL REFERENCES hubs(id), + health_score INTEGER NOT NULL, + -- 0–100 + open_candidates INTEGER NOT NULL DEFAULT 0, + regressed_widgets INTEGER NOT NULL DEFAULT 0, + stale_decisions INTEGER NOT NULL DEFAULT 0, + active_bottlenecks INTEGER NOT NULL DEFAULT 0, + computed_at TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL +); + +CREATE INDEX hub_health_snapshots_hub_id_idx ON hub_health_snapshots (hub_id); +CREATE INDEX hub_health_snapshots_computed_at_idx + ON hub_health_snapshots (hub_id, computed_at DESC); + +-- Patterns detected across multiple hubs. +CREATE TABLE cross_hub_propagations ( + id UUID DEFAULT uuid_generate_v4() PRIMARY KEY NOT NULL, + pattern_type TEXT NOT NULL, + -- 'annotation_cluster' | 'widget_type_friction' + source_hub_id UUID REFERENCES hubs(id), + affected_hub_ids JSONB NOT NULL DEFAULT '[]', + -- array of hub UUIDs + summary TEXT NOT NULL, + detected_at TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL, + status TEXT NOT NULL DEFAULT 'open', + -- 'open' | 'acknowledged' | 'resolved' + notes TEXT +); + +CREATE INDEX cross_hub_propagations_status_idx ON cross_hub_propagations (status); +CREATE INDEX cross_hub_propagations_pattern_idx ON cross_hub_propagations (pattern_type); +``` + +**Exit criteria:** `migrate` runs cleanly; all Phase 7 types available in GHCi. + +--- + +### T02 — Widget Pain Heatmap: friction scoring and per-hub heatmap view + +```task +id: IHUB-WP-0007-T02 +status: todo +priority: high +state_hub_task_id: "3a5ecd28-17c2-4258-bfd9-b3eaecf52135" +``` + +1. Add `Application/Helper/FrictionScore.hs` with `computeFrictionScore`: + - `annotation_count` — total annotations for widget + - `error_event_count` — events with `event_type = 'errored'` + - `regression_flag` — `True` if widget appears in `regressedWidgetIds` + - `stale_candidate_count` — open candidates older than 30 days + - Score formula (documented in module header): + ``` + score = min 100 $ + annotationCount * 5 + + errorEventCount * 10 + + (if regressionFlag then 20 else 0) + + staleCandidateCount * 8 + ``` + - Upserts into `friction_scores` (UPDATE if exists, INSERT otherwise) +2. Add `RecomputeFrictionAction { hubId }` to `HubsController`: + - Recomputes scores for all widgets in the hub + - Redirects back to heatmap view +3. Add `FrictionHeatmapAction { hubId }` view: + - Grid of widget cards, colour-coded by score band: + - 0–19: green (`bg-green-100`) + - 20–39: yellow (`bg-yellow-100`) + - 40–59: amber (`bg-orange-100`) + - 60+: red (`bg-red-100`) + - Each card: widget name, score, link to widget show + - "Recompute" button triggers `RecomputeFrictionAction` +4. Link from hub Show page as "Friction Heatmap" + +**Exit criteria:** Scores compute correctly for test fixtures; heatmap renders +with correct colour bands; recompute updates scores. + +--- + +### T03 — Workflow Bottleneck Analysis: stall detection across the pipeline + +```task +id: IHUB-WP-0007-T03 +status: todo +priority: high +state_hub_task_id: "ada0347a-880b-454e-843f-4a9135ea8739" +``` + +1. Add `Application/Helper/BottleneckDetector.hs` with `detectBottlenecks`: + - Stage 1 — `candidate`: `RequirementCandidate` with `status='open'` and + `created_at < now() - interval '30 days'` + - Stage 2 — `requirement`: `Requirement` with no linked `DecisionRecord` and + `created_at < now() - interval '60 days'` + - Stage 3 — `decision`: `DecisionRecord` with no linked `DeploymentRecord` + and `decided_at < now() - interval '30 days'` + - Stage 4 — `observation`: `DeploymentRecord` with no linked `OutcomeSignal` + and `deployed_at < now() - interval '14 days'` + - Severity: `critical` if age > 2× threshold, else `high` if > 1.5×, else `medium` + - Upserts `BottleneckRecord` (skip if already exists for same subject) +2. Add `DetectBottlenecksAction { hubId }` — runs detector, redirects to dashboard +3. Add `BottleneckDashboardAction { hubId }` view: + - Table grouped by pipeline stage + - Columns: subject (linked), stalled since, age, severity badge + - "Resolve" button → `ResolveBottleneckAction { bottleneckRecordId }` + - "Detect" button triggers fresh detection +4. Link from hub Show page as "Bottlenecks" + +**Exit criteria:** Stale candidates create bottleneck records; dashboard renders +and groups correctly; resolve marks `resolved_at`. + +--- + +### T04 — Hub Health Correlation: composite health score and history + +```task +id: IHUB-WP-0007-T04 +status: todo +priority: high +state_hub_task_id: "b0c932c5-fdb7-47b6-adc7-b4f8ed5555e6" +``` + +1. Add `Application/Helper/HubHealth.hs` with `computeHubHealth`: + - Deduction table (documented in module): + ``` + -5 per open RequirementCandidate + -10 per regressed widget + -8 per stale DecisionRecord (decided > 30 days, no deployment) + -12 per active critical BottleneckRecord + -6 per active high BottleneckRecord + floor at 0 + ``` + - Inserts new `HubHealthSnapshot` (never updates — history is append-only) +2. Add `SnapshotHubHealthAction { hubId }` — computes and redirects to history +3. Add `HubHealthHistoryAction { hubId }` view: + - Table of snapshots: timestamp, score (colour-coded), component breakdown + - Latest score shown prominently at top +4. Show health score badge on hub Show page (next to dashboard links): + - Fetch latest snapshot; display colour-coded score pill + - If no snapshot: "–" with link to take first snapshot + +**Exit criteria:** Snapshot computes correct score against test fixtures; history +table renders in order; badge appears on hub Show page. + +--- + +### T05 — Cross-Hub Propagation Analysis: pattern detection across hubs + +```task +id: IHUB-WP-0007-T05 +status: todo +priority: medium +state_hub_task_id: "7a860b9f-a835-47d6-96d8-2964ae37b12d" +``` + +1. Add `Application/Helper/CrossHubPropagation.hs` with `detectPropagations`: + - **Annotation cluster heuristic**: for each annotation `category`, count + distinct hubs with ≥3 annotations in that category in the last 14 days. + If ≥2 hubs qualify, emit a `CrossHubPropagation` with + `pattern_type='annotation_cluster'` and a generated summary. + - **Widget type friction heuristic**: for each `widget_type`, count hubs + where the max `FrictionScore` for that type is ≥40. If ≥2 hubs qualify, + emit `pattern_type='widget_type_friction'`. + - Skip if a matching open/acknowledged propagation already exists + (idempotent detection) +2. Add `DetectPropagationsAction` (global, no hubId) — runs detector +3. Add `CrossHubPropagationsAction` view (global): + - Table: pattern type, source hub, affected hubs (comma list), summary, + detected at, status badge + - "Acknowledge" and "Resolve" actions +4. Link from global nav (alongside "Adapters", "Ops Review") + +**Exit criteria:** Detection creates propagation records for qualifying patterns; +duplicate runs are idempotent; acknowledge/resolve transitions work. + +--- + +### T06 — Operational Review Board Dashboard: cross-hub unified view + +```task +id: IHUB-WP-0007-T06 +status: todo +priority: medium +state_hub_task_id: "ffabc4d1-c166-4b7d-8bec-55365cbe0666" +``` + +1. Add `OperationalReviewBoardAction` to a new `OperationsController` + (or `HubsController` as a global action — no `hubId` parameter): + - **Panel 1 — Hub health matrix**: all hubs, latest health score (or "–"), + colour-coded row, link to hub and to health history + - **Panel 2 — Top friction widgets**: top 10 across all hubs by + `FrictionScore.score DESC`; columns: widget name, hub, score band, link + - **Panel 3 — Active bottlenecks by stage**: count of unresolved bottlenecks + per stage across all hubs; click-through to hub bottleneck dashboard + - **Panel 4 — Open cross-hub propagations**: list of open/acknowledged + propagation events with pattern type and affected hub count +2. `autoRefresh` — live-updates +3. Link from global nav as "Ops Review" +4. Link from global nav cross-hub propagation count badge if > 0 + +**Exit criteria:** Dashboard renders all four panels; health matrix shows all +hubs; top friction list is correctly sorted; live-updates on data change. + +--- + +### T07 — Phase 7 gate: tests, consistency, docs + +```task +id: IHUB-WP-0007-T07 +status: todo +priority: high +state_hub_task_id: "a14b94f8-3b27-4f0c-9949-60fb65a57a05" +``` + +1. **Integration tests** (`Test/`): + - `FrictionScore` compute formula: widget with known annotation count → + expected score + - `BottleneckRecord` create + resolve: stale candidate → bottleneck detected; + resolve sets `resolved_at` + - `HubHealthSnapshot` compute: hub with known candidates/regressions → expected + score; history fetch returns in order + - `CrossHubPropagation` create + acknowledge + resolve + - `OperationalReviewBoard` action: compiles, fetches all hubs, returns counts +2. **Consistency sync** via State Hub MCP: + `check_repo_consistency(repo_slug="inter-hub", fix=True)` +3. **Documentation updates:** + - Update `SCOPE.md` current state section: Phase 7 complete + - Write `docs/phase7-summary.md`: what was built, scoring formulae, bottleneck + thresholds, cross-hub heuristics, known limitations, Phase 8 readiness +4. **Smoke test checklist:** + - Create two hubs with widgets and annotations; run friction recompute; verify + heatmap colours + - Age a candidate by force-setting `created_at`; run detect bottlenecks; + verify record appears + - Snapshot health for both hubs; verify Ops Review Board health matrix + - Trigger cross-hub propagation detection; verify propagation record + - Open Ops Review Board; confirm all four panels populate + +**Exit criteria:** All tests pass; consistency sync reports no errors; smoke +test completed; SCOPE.md updated. + +--- + +## Phase 7 Dependencies + +- Phases 1–6 schema stable (widget registry, interaction events, annotations, + requirements, decisions, outcomes, agent proposals, adapter specs) +- `friction_scores` requires widgets (T01 before T02) +- `bottleneck_records` requires hubs, candidates, requirements, decisions, + deployments (T01 before T03) +- `hub_health_snapshots` requires hubs and reads from bottleneck_records + (T03 before T04) +- `cross_hub_propagations` requires hub friction scores (T02 before T05) +- Operational Review Board aggregates all Phase 7 data (T02–T05 before T06) +- All feature tasks (T01–T06) before gate (T07) + +## Notes + +- **Friction scores are recomputed, not append-only.** Each widget has at most + one `FrictionScore` row (unique constraint on `widget_id`). Historical trend + is not tracked at the friction level — use `HubHealthSnapshot` for trends. +- **Bottleneck detection is idempotent.** Re-running the detector skips records + where an unresolved bottleneck already exists for the same subject. +- **Health snapshots are append-only.** Every `SnapshotHubHealthAction` call + inserts a new row. This preserves the health history for trend analysis. +- **Cross-hub detection requires FrictionScores to be current.** Run + `RecomputeFrictionAction` for all hubs before `DetectPropagationsAction`. +- **No scheduled jobs in Phase 7.** Detection and recomputation are triggered + manually via UI or curl. Phase 8 can layer on a cron/job system. +- **Severity thresholds and score weights are constants in the helper modules.** + They are intentionally not stored in the DB to avoid config drift — change + them in code and recompute.