diff --git a/docs/PRD-helix-forge.md b/docs/PRD-helix-forge.md index 55c6bb0..7105f8b 100644 --- a/docs/PRD-helix-forge.md +++ b/docs/PRD-helix-forge.md @@ -255,12 +255,26 @@ record: three flavors? - **OQ3** Where does detection logic run — local batch jobs, hub-side, or a dedicated service? What volume do we actually expect? -- **OQ4** Pattern format: how do we keep one agnostic representation while giving each - distributor enough to render high-quality native artifacts? -- **OQ5** What's the minimum trustworthy evidence bar before a pattern is allowed to be - distributed to live agent environments? -- **OQ6** How do we prevent pattern bloat — too many low-value instructions degrading - agent context budgets (cf. the token-budget policy in global instructions)? +- ~~**OQ4** Pattern format: how do we keep one agnostic representation while giving each + distributor enough to render high-quality native artifacts?~~ **Resolved (Phase 2, + AGENTIC-WP-0004):** the `SolutionPattern` core is flavor-agnostic (problem, + resolutions, scope, provenance) and carries per-flavor knowledge only in a separate + `rendering_hints` sub-structure keyed by flavor — distributors read the hints, the + core stays neutral. Catalogued as versioned files-first artifacts (FR-U3). +- ~~**OQ5** What's the minimum trustworthy evidence bar before a pattern is allowed to be + distributed to live agent environments?~~ **Resolved (Phase 2):** a two-tier + evidence bar (`[curate.gate]`). A *promote* floor (frequency / distinct sessions / + cost-impact) admits a candidate as `provisional`; a stricter *distribution* floor + (higher frequency, optional cross-flavor requirement, cost-impact) is required to + mark a pattern `approved` + `distribution_ready`. Defaults are conservative and + config-tunable. +- ~~**OQ6** How do we prevent pattern bloat — too many low-value instructions degrading + agent context budgets (cf. the token-budget policy in global instructions)?~~ + **Resolved (Phase 2):** a bloat guard flags duplicate (same id) and near-duplicate + (same signal-type+locus) candidates at review time, and the catalog dedups + structurally on the source-candidate key so re-promotion never multiplies entries. + Thin candidates stay `provisional` (not distributed) rather than padding live + context. ## 13. Risks diff --git a/session_memory/catalog/sp-problem-abandoned-outcome.json b/session_memory/catalog/sp-problem-abandoned-outcome.json new file mode 100644 index 0000000..9a6a651 --- /dev/null +++ b/session_memory/catalog/sp-problem-abandoned-outcome.json @@ -0,0 +1,79 @@ +{ + "created_at": "2026-06-07T08:02:03Z", + "distribution_ready": true, + "id": "sp-problem-abandoned-outcome", + "name": "cross-flavor problem: abandoned", + "polarity": "problem", + "problem": "cross-flavor problem: abandoned", + "provenance": { + "detected_at": null, + "evidence": { + "cost_impact": 13.0, + "cross_flavor": true, + "flavors": [ + "claude", + "grok" + ], + "frequency": 13, + "key": "problem:abandoned:outcome", + "locus": "outcome", + "polarity": "problem", + "repos": [ + "can-you-assist", + "llm-connect" + ], + "score": 253.5, + "sessions": [ + "claude:0510d5f4-956d-430a-9e89-6abc54f95b6a", + "claude:106fd234-949e-470d-a208-fe5ed8f14562", + "claude:377aba4f-8bbf-4760-90e9-469486ab0518", + "claude:4c606c31-beff-4a41-a325-ef63c9f8fb0e", + "claude:5bffe081-39fb-44cd-9966-4006f9235a0e", + "claude:60d3c947-eacf-49e9-b12c-ff8eb6b1c20b", + "claude:8f50f5b4-fbc4-4abe-9a7c-b25b2a713671", + "claude:95b1fe00-5d2e-482f-9618-fddf9cdbeb51", + "claude:c3e782ad-96b9-4cf1-9eb5-defdf3578426", + "claude:d75b2084-faec-40cf-aaf8-d7e0c026bde6", + "claude:f282058a-0a43-4fb8-87fc-1e67eaa3533c", + "grok:019e6103-af11-7a92-8e0b-5f40465d8223", + "grok:019e611e-0728-77d3-bb7a-8c5983e5058a" + ], + "signal_type": "abandoned", + "title": "cross-flavor problem: abandoned" + }, + "promoted_at": "2026-06-07T08:02:03Z", + "source_key": "problem:abandoned:outcome" + }, + "rendering_hints": { + "claude": { + "note": "TODO: refine rendering", + "target": "CLAUDE.md" + }, + "grok": { + "note": "TODO: refine rendering", + "target": "instructions" + } + }, + "resolutions": [ + { + "detail": "", + "steps": [], + "summary": "TODO: capture the recommended resolution" + } + ], + "schema_version": 1, + "scope": { + "domains": [], + "flavors": [ + "claude", + "grok" + ], + "repos": [ + "can-you-assist", + "llm-connect" + ] + }, + "status": "approved", + "updated_at": "2026-06-07T08:02:03Z", + "version": "1.0.0" +} diff --git a/session_memory/catalog/sp-problem-budget_overrun-tokens.json b/session_memory/catalog/sp-problem-budget_overrun-tokens.json new file mode 100644 index 0000000..a451e42 --- /dev/null +++ b/session_memory/catalog/sp-problem-budget_overrun-tokens.json @@ -0,0 +1,78 @@ +{ + "created_at": "2026-06-07T08:02:03Z", + "distribution_ready": true, + "id": "sp-problem-budget_overrun-tokens", + "name": "problem: budget overrun", + "polarity": "problem", + "problem": "problem: budget overrun", + "provenance": { + "detected_at": null, + "evidence": { + "cost_impact": 27.135, + "cross_flavor": false, + "flavors": [ + "claude" + ], + "frequency": 8, + "key": "problem:budget_overrun:tokens", + "locus": "tokens", + "polarity": "problem", + "repos": [ + "activity-core", + "artifact-store", + "citation-evidence", + "flex-auth", + "infospace-bench", + "railiance-apps", + "vergabe-teilnahme" + ], + "score": 217.08, + "sessions": [ + "claude:0ef1b45c-5c27-4e20-88b3-37daeaa24eca", + "claude:2c0d14e1-d089-4076-bf35-b134737a261d", + "claude:6e0d3d68-872b-4d93-bb09-0691e091314b", + "claude:8313f946-f008-4e98-9915-31950380e39e", + "claude:8fabd5ce-6a20-4412-9a8b-0f0763394a78", + "claude:a7b4a9b3-0942-4899-b502-e76b0013fc42", + "claude:b4ae9631-a7eb-42a6-acb1-c65b660c4b74", + "claude:bbcf1c2b-14be-40e4-826b-4b2b49b9d212" + ], + "signal_type": "budget_overrun", + "title": "problem: budget overrun" + }, + "promoted_at": "2026-06-07T08:02:03Z", + "source_key": "problem:budget_overrun:tokens" + }, + "rendering_hints": { + "claude": { + "note": "TODO: refine rendering", + "target": "CLAUDE.md" + } + }, + "resolutions": [ + { + "detail": "", + "steps": [], + "summary": "TODO: capture the recommended resolution" + } + ], + "schema_version": 1, + "scope": { + "domains": [], + "flavors": [ + "claude" + ], + "repos": [ + "activity-core", + "artifact-store", + "citation-evidence", + "flex-auth", + "infospace-bench", + "railiance-apps", + "vergabe-teilnahme" + ] + }, + "status": "approved", + "updated_at": "2026-06-07T08:02:03Z", + "version": "1.0.0" +} diff --git a/session_memory/catalog/sp-success-clean_pass-outcome.json b/session_memory/catalog/sp-success-clean_pass-outcome.json new file mode 100644 index 0000000..6501853 --- /dev/null +++ b/session_memory/catalog/sp-success-clean_pass-outcome.json @@ -0,0 +1,106 @@ +{ + "created_at": "2026-06-07T08:02:03Z", + "distribution_ready": true, + "id": "sp-success-clean_pass-outcome", + "name": "cross-flavor success: clean pass", + "polarity": "success", + "problem": "cross-flavor success: clean pass", + "provenance": { + "detected_at": null, + "evidence": { + "cost_impact": 20.0, + "cross_flavor": true, + "flavors": [ + "claude", + "grok" + ], + "frequency": 20, + "key": "success:clean_pass:outcome", + "locus": "outcome", + "polarity": "success", + "repos": [ + "activity-core", + "agentic-resources", + "artifact-store", + "can-you-assist", + "citation-evidence", + "infospace-bench", + "issue-facade", + "ops-bridge", + "railiance-apps", + "state-hub", + "the-custodian", + "vergabe-teilnahme" + ], + "score": 600.0, + "sessions": [ + "claude:0ef1b45c-5c27-4e20-88b3-37daeaa24eca", + "claude:16bdbec4-b018-4902-9fb5-336f8f3d61c8", + "claude:2c0d14e1-d089-4076-bf35-b134737a261d", + "claude:30dbad62-c042-41f2-80c1-5953a1100e7f", + "claude:39dd33b1-d156-4d6a-8c33-c359b6f841d8", + "claude:4307eff6-cd39-4189-be58-79a3acb69d6c", + "claude:4340b160-2fb6-47d0-897c-3cac0a8855d8", + "claude:631de76e-fdee-43b5-b091-7b7675467ad1", + "claude:63fd4df2-5add-4748-af21-c1544825e006", + "claude:6e0d3d68-872b-4d93-bb09-0691e091314b", + "claude:8313f946-f008-4e98-9915-31950380e39e", + "claude:8fabd5ce-6a20-4412-9a8b-0f0763394a78", + "claude:99e9c5af-043f-4b97-8d92-14189da8716b", + "claude:a7b4a9b3-0942-4899-b502-e76b0013fc42", + "claude:a9483f07-c9dc-4f71-9fa0-831790ea965e", + "claude:b4ae9631-a7eb-42a6-acb1-c65b660c4b74", + "claude:eb837dd1-5b8e-472e-b9e1-4537b10e03e6", + "claude:ee9e84f2-bc35-4eb5-a7ad-aaec5f31d965", + "claude:f1b25697-0e5f-45f0-81d1-af0f1762c438", + "grok:019e6122-00c0-79f3-b4e5-9c70b77c015d" + ], + "signal_type": "clean_pass", + "title": "cross-flavor success: clean pass" + }, + "promoted_at": "2026-06-07T08:02:03Z", + "source_key": "success:clean_pass:outcome" + }, + "rendering_hints": { + "claude": { + "note": "TODO: refine rendering", + "target": "CLAUDE.md" + }, + "grok": { + "note": "TODO: refine rendering", + "target": "instructions" + } + }, + "resolutions": [ + { + "detail": "", + "steps": [], + "summary": "TODO: capture the recommended resolution" + } + ], + "schema_version": 1, + "scope": { + "domains": [], + "flavors": [ + "claude", + "grok" + ], + "repos": [ + "activity-core", + "agentic-resources", + "artifact-store", + "can-you-assist", + "citation-evidence", + "infospace-bench", + "issue-facade", + "ops-bridge", + "railiance-apps", + "state-hub", + "the-custodian", + "vergabe-teilnahme" + ] + }, + "status": "approved", + "updated_at": "2026-06-07T08:02:03Z", + "version": "1.0.0" +} diff --git a/workplans/AGENTIC-WP-0004-session-memory-phase2.md b/workplans/AGENTIC-WP-0004-session-memory-phase2.md index 9b9c507..7fed9df 100644 --- a/workplans/AGENTIC-WP-0004-session-memory-phase2.md +++ b/workplans/AGENTIC-WP-0004-session-memory-phase2.md @@ -4,11 +4,11 @@ type: workplan title: "Coding Session Memory — Phase 2 (Curate: review workflow + Pattern Catalog)" domain: helix_forge repo: agentic-resources -status: ready +status: finished owner: codex topic_slug: helix-forge created: "2026-06-06" -updated: "2026-06-06" +updated: "2026-06-07" state_hub_workstream_id: "b3703684-f60e-42f3-b03e-dabe3e8ce3f4" --- @@ -146,7 +146,7 @@ detect → curate → (Phase 3) distribute flow. ```task id: AGENTIC-WP-0004-T07 -status: todo +status: done priority: medium state_hub_task_id: "20407007-0a8b-4999-a470-fa3c84e17eba" ``` @@ -156,10 +156,22 @@ Unit tests for schema/catalog/review/gating on synthetic candidates, plus an the live detect output (the Claude+Grok "clean pass" / "abandoned" patterns from the WP-0003 verification) into the catalog and confirms a hub decision is logged (or queued if the API is down). Confirm catalog round-trips and versioning is -idempotent on re-run. Refresh design open questions **OQ4/OQ5/OQ6** in -[DESIGN-session-memory.md](../docs/DESIGN-session-memory.md). After workplan file -updates, notify the custodian operator to run from `~/state-hub`: +idempotent on re-run. Refresh design open questions **OQ4/OQ5/OQ6** (PRD §12). +After workplan file updates, notify the custodian operator to run from +`~/state-hub`: ```bash make fix-consistency REPO=agentic-resources ``` + +**Verification results (2026-06-07):** full suite 72/72 green (26 new curate +tests across schema/catalog/review/gating/decisions/entrypoint). Live pipeline +over real local sessions: fresh ingest 94→93 → 72 digests; detect surfaced 3 +candidates, **2 cross-flavor** (Claude+Grok). `curate --auto-approve` promoted +all 3 into the files-first catalog — `sp-success-clean_pass-outcome` and +`sp-problem-abandoned-outcome` (both cross-flavor, `approved`/`distribution_ready`) +plus `sp-problem-budget_overrun-tokens` (Claude-only). 3 hub decisions queued +(API offline). Re-run was fully idempotent (3 skipped, 0 catalog writes, no +version bump). PRD §12 OQ4/OQ5/OQ6 resolved. The 3 catalog artifacts are +committed as the source of truth; operator runs `make fix-consistency` to index +them in the hub.