From dc699be97627975ce9a2250fc394107796baf4ed Mon Sep 17 00:00:00 2001 From: tegwick Date: Thu, 2 Jul 2026 23:40:45 +0200 Subject: [PATCH] Implement PMEM-WP-0016 ops-warden cross-runtime memory contracts. Add ops-warden coordination profile, session event schemas, activation helpers, adapter pack, evaluation scenarios, and contract documentation for shared memory across worker, agent session, and operator CLI surfaces. --- .INTENT.md.swp | Bin 0 -> 16384 bytes docs/ops-warden-memory-contract.md | 75 +++ src/phase_memory/__init__.py | 46 ++ src/phase_memory/ops_warden.py | 497 ++++++++++++++++++ .../ops-warden-coordination-profile.json | 46 ++ .../ops-warden-evaluation-scenarios.json | 31 ++ tests/fixtures/public-api-snapshot.json | 22 + tests/test_ops_warden_memory.py | 150 ++++++ ...WP-0016-ops-warden-cross-runtime-memory.md | 32 +- 9 files changed, 891 insertions(+), 8 deletions(-) create mode 100644 .INTENT.md.swp create mode 100644 docs/ops-warden-memory-contract.md create mode 100644 src/phase_memory/ops_warden.py create mode 100644 tests/fixtures/ops-warden-coordination-profile.json create mode 100644 tests/fixtures/ops-warden-evaluation-scenarios.json create mode 100644 tests/test_ops_warden_memory.py diff --git a/.INTENT.md.swp b/.INTENT.md.swp new file mode 100644 index 0000000000000000000000000000000000000000..cecbcbb570f0ea8bbc2fe83b77ab0e4283bcfa63 GIT binary patch literal 16384 zcmeI3TdX8S8OK`?F9?c2@Io*pt7|;YoLy8jppmSsk`;Dcmc=KHTc^8bra0YQO<&I0 z*#|KJ6D1H#ygfkTCHU-viar=6CYngZL>?sSgT81)WAFhnJ{bIcRn@(7X2B;CW9*as zGdnZYRbTzTYkf7lb?{?Hj;iZ6uSxj3BuTz~$v^F}^1S4i#g)mftDAHoU;O94t+Jiw zx;l8!Px8WJKNTiCP%U(0riCe7y*zN_*vStaJGoh8FZ|s2&B(yWz>C4a#mT$gdCftY zY2VdvR+n9R`-?H@ILFAq$iT?J$iT?J$iT?J$iT?J$iV*y18sGFau>3m7v=5Z>r2*N zpNoG@{CsEa`}5=b{qgmwweREG@nvLSWME`qWME`qWME`qWME`qWME`qWME`qWZ=KR zfac!)m8|=BIq-A;-=F{g@tP!g7W@VL89W3&3f=^+0*}8sN$vsf1^dBU!IKv!$!EbC zumm$;!0W-27bVHBz@y+t-~sRv@CNX^S0%|;z?Z?zU<14zTne7KFiE}-9smpQ0dN&~ z4xxVyehj_=z79SQ9snN%hrk591N{CK$PXR__kz!X_kq8@JW0L>9syqiUj?^<3&0Ov z#xvkS@M&-tcsKaN1xfNHa0V>F0?fe)a4UEnxB^@V{`FGi2Y&>=1wRLmf^UI`!9(C4 za5p#wJ_!ziE5Xw=RDKBV2ls&n)ZidE0Imj?gNwj(G;jU}{sevl9s}P2Uj$zOhk@w# znZBvhI+b>HZOT?Pt!~Xk?JjhywrX9b3)84wQyZnXbeXx*Wb&;|7iy-{wo$byv@I3? zwmUq?nB)l4EL@jos&vd^RBj8~noO}y%bZPG+e(&lwVsk)@Cn}!lFFBbspqEQQ(LyCuDi-V$NL?f+iZBoWVV%2ZmTi%&hT;T zT&_}`=ki^ZBQ39p2{&z5`*$4=#5c)p3s<*O)+tz5e!#-B6$4?dT^LpB9Xr=sxq%T~ z8Rcfk9|V&fxbk|g-uLZOH@b4i)Qy)y=ju3nr?WPVROq_3fo&~{n9WQobZ#wK!#1#y z+J?Dq7#?t4o4NuSY#BGr@TxXD)^(yX-MG@0^Y8$g=8MdfDN=S-#fBQ?YL&VYZDm%p zY};Y1ZknzLoT|*p8Z8o32IhMamTFha{#RsEh0e?k$%<{nrPQ6BsV&nq9X6#K2$`yx z%X7Co?JBgBLtwAL+BVx>XK{ZFt<4>Q4Y3#bu4lvREk`pYkMDSc=(owJm0j>I&9yD$ zfyJ_N=&kX?k*o83GlvzgdJl%g@gVRI8ascXav8rRimt~Pm~X<@5oB7Q7Z z=S>yG&ya{!WlmSDht(8zUVE7*NWWvv?j$(F2|Kr$ zx<%GIDUNm2I~Mr^uUeF*3A>15qmWi3uZAXUxV7yv>S$Bl1_?H|XJ9GHR=}A!g1Efl zA8=k0aK(d8P@#Dj7JRoE|*$-)@tK}1|8eZ@okvZ;HGJh}e!-NNfvkjuGmvC2QF;-uHNaXJ^Ucag@p^srPzK55N7OjVm!+|`vR&g3TiD&P1Elh@`&Z&-8pID0#A>_#3yw1os^DxPYwcNWpN1qRskIiW1cx^GbJ_S%n)Pn#+TZyoUg@NSI>1R z$uvnKh2l2UebBSw%#vCy1qs}UOW<*PIHjTvg~zIsiJtJd9g#I-?~Rap3PonZg$ zDd*Jny_`YU(YMhOb)OVG;<`f)k+|Y_-KTN4%h^Fn^2SxvHLXq2$>3{shmuIL-u%UW zR*M)RMbcW5V6r}oir)s$3YAtYGJJ`&;vbty&u}!hjSa?Efq{NE%;_sYVyi2Z8S;tn zGxPZ`W~yW@vzXItgFRs<(F`@-9ciCtQi+!RB5MW;-9EJ(?`lk%jqOw6clU{$|F7VT z{8J$3|NXHC@dRi5yTP^KQt(>v3}^grf=_^tgZF@^INLu4eg+-^-v#%B`+x>-0nc*A ze;hmv?gGcb8^IHt@9zbN!P~%oa5=aP{DL$7q^w#~&LLN>lLnR)MT4u-PwCrdV za9EZ$LQaQFj@YtK>A%o<;Fdw!82$MsNpAG*M_(iRZnPXom||xA5yziVe2;HfOwJ*D z%j`{GD94r?y~Sg$rCwI*m}#dsb4bc4+V@u19F^(n=$el?3;4rNe%>=Ve4!8I;bqQQK-%b`@ufR1H>8Udy5+uG4@G@X!ymW& zDN0G_RQd)yxWh?{?e_<%TW&eZvDoY1AEws(N@5^%(&i=SP@6V>&ZyVnz$XXz;b`c$ z*SC7kw!hdWtf#&K#c(ec2fnG0#ro6Ofe`&lhwOQ~OtH@!ni_IknsI#~i~HFQgoc7# zG%URka@yBo@$^a#9X}!i`=&rVybjHs1Mxhya(Tq)+|dv0kARUePe8^}xvRVt3PDXl ze-5=(xm)%fwp5q&-erQgXwZZV4}$WathrR9nQ@3)6W>tSYm#f-2Y<()j><@BexLLm zF)`7z3&n%Fgac_b<2rmT&eNZcqm%V_GVm__+E6pn_*sqA#Ge*~kB>ZBta abaQx*c+v=hUwV-K0!frj94D5eqkjV=^2iwg literal 0 HcmV?d00001 diff --git a/docs/ops-warden-memory-contract.md b/docs/ops-warden-memory-contract.md new file mode 100644 index 0000000..68d4d9e --- /dev/null +++ b/docs/ops-warden-memory-contract.md @@ -0,0 +1,75 @@ +# Ops-Warden Cross-Runtime Memory Contract + +Updated: 2026-07-02 + +## Purpose + +phase-memory provides a shared experiential substrate for ops-warden across: + +- `warden worker` ticks with llm-connect / OpenRouter inference +- Coding agent sessions (Claude Code, Codex, Grok, future agents) +- Direct operator CLI use + +All runtimes use one canonical store and the same redaction rules. + +## Canonical Store + +| Setting | Default | +| --- | --- | +| Path | `~/.local/share/warden/memory/` | +| Override | `WARDEN_MEMORY_STORE` | +| Opt-out | `WARDEN_MEMORY=0` | + +Layout: + +- `metadata.json` — store schema and profile id +- `events.jsonl` — append-only session episodes + +## Session Kinds + +| Runtime | `session_kind` | Selection | +| --- | --- | --- | +| Worker | `warden.worker` | `WARDEN_SESSION_KIND=warden.worker` during worker ticks | +| Operator CLI | `warden.operator` | default when `WARDEN_AGENT_ID` is unset | +| Agent session | `warden.agent.` | `WARDEN_AGENT_ID=claude|codex|grok|...` | + +## Event Schema + +Schema: `phase_memory.ops_warden.session_event.v1` + +Required metadata fields: + +- `session_kind`, `command`, `outcome` (`resolved|escalated|skipped`) +- `need_fingerprint` (digest only — never the raw need text when sensitive) +- `route_id`, `agent_id`, `session_id`, `diagnostic_codes` + +Rejected writes containing token-like values or raw URLs. + +## Activation + +`activate_ops_warden_memory` returns: + +- stabilized route matches (verified repeats) +- recent episodes filtered by session kind / need fingerprint +- `llm_calls_avoided` when stabilized memory covers the need + +Rigid memory (charter, allowlist) is import-only and never promoted from episodes. + +## Agent Session Orientation + +Coding agents should: + +1. Set `WARDEN_AGENT_ID` to their runtime id. +2. Run `warden memory activate --json` at session start. +3. Use normal `warden route` / `warden access` commands; episodes are recorded + automatically when memory is enabled. + +## Evaluation + +Ops-warden memory quality is measured by: + +- `routing_repeat_accuracy` +- `cross_runtime_continuity` +- `llm_calls_avoided_count` + +Use `evaluation_trend_regression_gate` when promoting worker or routing changes. \ No newline at end of file diff --git a/src/phase_memory/__init__.py b/src/phase_memory/__init__.py index 33e4a43..29b3b86 100644 --- a/src/phase_memory/__init__.py +++ b/src/phase_memory/__init__.py @@ -126,6 +126,30 @@ from .pilot import ( managed_deployment_pilot_report, write_live_pilot_evidence, ) +from .ops_warden import ( + KNOWN_AGENT_IDS, + OPS_WARDEN_ACTIVATION_SCHEMA, + OPS_WARDEN_ADAPTER_PACK_NAME, + OPS_WARDEN_MEMORY_STATUS_SCHEMA, + OPS_WARDEN_PROFILE_ID, + OPS_WARDEN_RUNTIME_SCHEMA, + OPS_WARDEN_SESSION_EVENT_SCHEMA, + OpsWardenMemoryStore, + activate_ops_warden_memory, + build_session_event, + default_memory_store_path, + memory_enabled, + ops_warden_adapter_pack, + ops_warden_coordination_profile, + ops_warden_evaluation_metrics, + ops_warden_evaluation_report, + record_session_event, + resolve_session_kind, + session_kind_for_agent, + stabilized_route_match, + validate_memory_write, + validate_ops_warden_profile, +) from .planner import plan_profile_execution from .runtime import PhaseMemoryRuntime from .troubleshooting import ( @@ -194,7 +218,15 @@ __all__ = [ "LIVE_PILOT_REPORT_SCHEMA", "LocalMarkitectValidator", "OptionalMarkitectValidator", + "OPS_WARDEN_ACTIVATION_SCHEMA", + "OPS_WARDEN_ADAPTER_PACK_NAME", + "OPS_WARDEN_MEMORY_STATUS_SCHEMA", + "OPS_WARDEN_PROFILE_ID", + "OPS_WARDEN_RUNTIME_SCHEMA", + "OPS_WARDEN_SESSION_EVENT_SCHEMA", + "OpsWardenMemoryStore", "PHASE_MEMORY_CREDENTIAL_NEEDS", + "KNOWN_AGENT_IDS", "abandon_path", "branch_path", "compact_path", @@ -234,7 +266,21 @@ __all__ = [ "package_request_from_selection", "package_response_envelope", "WordCountTokenEstimator", + "activate_ops_warden_memory", "activation_quality_report", + "build_session_event", + "default_memory_store_path", + "memory_enabled", + "ops_warden_adapter_pack", + "ops_warden_coordination_profile", + "ops_warden_evaluation_metrics", + "ops_warden_evaluation_report", + "record_session_event", + "resolve_session_kind", + "session_kind_for_agent", + "stabilized_route_match", + "validate_memory_write", + "validate_ops_warden_profile", "plan_neighborhood_activation", "retrieve_graph_neighborhood", "select_event_path", diff --git a/src/phase_memory/ops_warden.py b/src/phase_memory/ops_warden.py new file mode 100644 index 0000000..99421a1 --- /dev/null +++ b/src/phase_memory/ops_warden.py @@ -0,0 +1,497 @@ +"""ops-warden cross-runtime memory contract and activation helpers.""" + +from __future__ import annotations + +import json +import os +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Mapping + +from .contracts import profile_from_markitect +from .external_adapters import ExternalAdapterPack, adapter_pack_manifest +from .models import Diagnostic +from .utils import stable_digest, utc_now_iso + +OPS_WARDEN_RUNTIME_SCHEMA = "phase_memory.ops_warden.runtime.v1" +OPS_WARDEN_SESSION_EVENT_SCHEMA = "phase_memory.ops_warden.session_event.v1" +OPS_WARDEN_ACTIVATION_SCHEMA = "phase_memory.ops_warden.activation.v1" +OPS_WARDEN_MEMORY_STATUS_SCHEMA = "phase_memory.ops_warden.memory_status.v1" +OPS_WARDEN_PROFILE_ID = "ops-warden-coordination" +OPS_WARDEN_ADAPTER_PACK_NAME = "ops-warden-coordination" + +SESSION_KIND_WORKER = "warden.worker" +SESSION_KIND_OPERATOR = "warden.operator" +SESSION_KIND_AGENT_PREFIX = "warden.agent." + +KNOWN_AGENT_IDS = ("claude", "codex", "grok") + +SECRET_FIELD_PATTERN = re.compile( + r"\b(" + r"token|secret|password|api[_ ]?key|private[_ ]?key|" + r"vault[_ ]?token|npm_auth_token|client[_ ]?secret|" + r"bearer\s+[a-z0-9._-]{8,}|" + r"https?://[^\s\"']+" + r")\b", + re.IGNORECASE, +) + +_OUTCOMES = frozenset({"resolved", "escalated", "skipped"}) + + +def default_memory_store_path(environ: Mapping[str, str] | None = None) -> Path: + environ = environ or os.environ + override = str(environ.get("WARDEN_MEMORY_STORE") or "").strip() + if override: + return Path(override).expanduser() + xdg_data = str(environ.get("XDG_DATA_HOME") or "").strip() + base = Path(xdg_data).expanduser() if xdg_data else Path.home() / ".local" / "share" + return base / "warden" / "memory" + + +def memory_enabled(environ: Mapping[str, str] | None = None) -> bool: + environ = environ or os.environ + return str(environ.get("WARDEN_MEMORY", "1")).strip().lower() not in {"0", "false", "no", "off"} + + +def session_kind_for_agent(agent_id: str | None = None) -> str: + agent_id = str(agent_id or "").strip().lower() + if agent_id: + return f"{SESSION_KIND_AGENT_PREFIX}{agent_id}" + return SESSION_KIND_OPERATOR + + +def resolve_session_kind(environ: Mapping[str, str] | None = None) -> str: + environ = environ or os.environ + explicit = str(environ.get("WARDEN_SESSION_KIND") or "").strip() + if explicit: + return explicit + agent_id = str(environ.get("WARDEN_AGENT_ID") or "").strip() + return session_kind_for_agent(agent_id or None) + + +def ops_warden_coordination_profile(path: str | Path | None = None) -> dict[str, Any]: + if path is None: + path = Path(__file__).resolve().parents[2] / "tests" / "fixtures" / "ops-warden-coordination-profile.json" + return json.loads(Path(path).read_text(encoding="utf-8")) + + +def validate_ops_warden_profile(data: dict[str, Any]) -> dict[str, Any]: + result = profile_from_markitect(data) + diagnostics = list(result.diagnostics) + policy = data.get("policy") if isinstance(data.get("policy"), dict) else {} + if policy.get("secrets_allowed") is not False: + diagnostics.append( + Diagnostic( + "error", + "ops_warden_profile_secrets_not_denied", + "Ops-warden coordination profile must deny secret storage.", + "policy.secrets_allowed", + ) + ) + activation = data.get("activation") if isinstance(data.get("activation"), dict) else {} + if int(activation.get("max_items") or 0) < 1: + diagnostics.append( + Diagnostic( + "error", + "ops_warden_profile_activation_budget_missing", + "Ops-warden coordination profile must declare activation budgets.", + "activation.max_items", + ) + ) + return { + "schema_version": OPS_WARDEN_RUNTIME_SCHEMA, + "valid": not any(item.severity == "error" for item in diagnostics), + "profile_id": str(data.get("id") or ""), + "diagnostics": [item.to_dict() for item in diagnostics], + } + + +def validate_memory_write(payload: Mapping[str, Any]) -> tuple[bool, tuple[Diagnostic, ...]]: + diagnostics: list[Diagnostic] = [] + serialized = json.dumps(dict(payload), sort_keys=True) + if SECRET_FIELD_PATTERN.search(serialized): + diagnostics.append( + Diagnostic( + "error", + "ops_warden_memory_secret_field_rejected", + "Memory write appears to contain secret values or raw endpoint URLs.", + "payload", + ) + ) + outcome = str(payload.get("outcome") or "") + if outcome and outcome not in _OUTCOMES: + diagnostics.append( + Diagnostic( + "error", + "ops_warden_memory_invalid_outcome", + "Session event outcome must be resolved, escalated, or skipped.", + "outcome", + {"actual": outcome}, + ) + ) + return not diagnostics, tuple(diagnostics) + + +def need_fingerprint(need: str) -> str: + return stable_digest(str(need or "").strip().lower()) + + +@dataclass(frozen=True) +class OpsWardenMemoryStore: + root: Path + + @classmethod + def open(cls, path: str | Path | None = None, environ: Mapping[str, str] | None = None) -> OpsWardenMemoryStore: + store = cls((Path(path) if path is not None else default_memory_store_path(environ)).expanduser()) + store.ensure_layout() + return store + + def ensure_layout(self) -> None: + self.root.mkdir(parents=True, exist_ok=True) + if not self.metadata_path.exists(): + self.metadata_path.write_text( + json.dumps( + { + "schema_version": OPS_WARDEN_RUNTIME_SCHEMA, + "profile_id": OPS_WARDEN_PROFILE_ID, + "created_at": utc_now_iso(), + }, + indent=2, + sort_keys=True, + ) + + "\n", + encoding="utf-8", + ) + + @property + def metadata_path(self) -> Path: + return self.root / "metadata.json" + + @property + def events_path(self) -> Path: + return self.root / "events.jsonl" + + def list_events(self) -> list[dict[str, Any]]: + if not self.events_path.exists(): + return [] + events: list[dict[str, Any]] = [] + for line in self.events_path.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + data = json.loads(line) + if isinstance(data, dict): + events.append(data) + return events + + def append_event(self, event: Mapping[str, Any]) -> dict[str, Any]: + payload = dict(event) + payload.setdefault("schema_version", OPS_WARDEN_SESSION_EVENT_SCHEMA) + payload.setdefault("recorded_at", utc_now_iso()) + ok, diagnostics = validate_memory_write(payload) + if not ok: + return { + "valid": False, + "diagnostics": [item.to_dict() for item in diagnostics], + } + event_id = str(payload.get("event_id") or f"ops-warden-event:{stable_digest(payload)}") + payload["event_id"] = event_id + with self.events_path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(payload, sort_keys=True) + "\n") + return {"valid": True, "event": payload, "diagnostics": []} + + def status(self) -> dict[str, Any]: + events = self.list_events() + counts: dict[str, int] = {} + last_activation = "" + for event in events: + kind = str(event.get("session_kind") or "unknown") + counts[kind] = counts.get(kind, 0) + 1 + if event.get("command") == "memory.activate": + last_activation = str(event.get("recorded_at") or last_activation) + return { + "schema_version": OPS_WARDEN_MEMORY_STATUS_SCHEMA, + "valid": True, + "store_path": str(self.root), + "profile_id": OPS_WARDEN_PROFILE_ID, + "episode_count": len(events), + "episode_counts_by_session_kind": dict(sorted(counts.items())), + "last_activation_at": last_activation, + "diagnostics": [], + } + + +def build_session_event( + *, + command: str, + session_kind: str, + outcome: str, + need: str = "", + route_id: str = "", + agent_id: str = "", + session_id: str = "", + diagnostic_codes: list[str] | None = None, + metadata: Mapping[str, Any] | None = None, +) -> dict[str, Any]: + return { + "schema_version": OPS_WARDEN_SESSION_EVENT_SCHEMA, + "session_kind": session_kind, + "agent_id": agent_id, + "session_id": session_id, + "command": command, + "need_fingerprint": need_fingerprint(need) if need else "", + "route_id": route_id, + "outcome": outcome, + "diagnostic_codes": list(diagnostic_codes or ()), + "metadata": dict(metadata or {}), + } + + +def record_session_event( + store: OpsWardenMemoryStore | Path | str, + event: Mapping[str, Any], +) -> dict[str, Any]: + memory = store if isinstance(store, OpsWardenMemoryStore) else OpsWardenMemoryStore.open(store) + return memory.append_event(event) + + +def stabilized_route_match( + events: list[dict[str, Any]], + *, + need: str = "", + need_fingerprint_value: str = "", + min_confirmations: int = 2, +) -> dict[str, Any] | None: + fingerprint = need_fingerprint_value or (need_fingerprint(need) if need else "") + if not fingerprint: + return None + matches = [ + event + for event in events + if event.get("need_fingerprint") == fingerprint + and event.get("outcome") == "resolved" + and event.get("route_id") + ] + if len(matches) < min_confirmations: + return None + route_id = str(matches[-1].get("route_id") or "") + return { + "need_fingerprint": fingerprint, + "route_id": route_id, + "confirmations": len(matches), + "source_event_ids": [str(item.get("event_id") or "") for item in matches[-min_confirmations:]], + } + + +def activate_ops_warden_memory( + store: OpsWardenMemoryStore | Path | str, + *, + session_kind: str, + need: str = "", + session_id: str = "", +) -> dict[str, Any]: + memory = store if isinstance(store, OpsWardenMemoryStore) else OpsWardenMemoryStore.open(store) + profile = ops_warden_coordination_profile() + profile_validation = validate_ops_warden_profile(profile) + events = memory.list_events() + fingerprint = need_fingerprint(need) if need else "" + stabilized = stabilized_route_match(events, need_fingerprint_value=fingerprint) if fingerprint else None + recent = list(reversed(events))[: int(profile.get("activation", {}).get("max_items") or 8)] + selected: list[dict[str, Any]] = [] + if stabilized: + selected.append( + { + "kind": "stabilized_route", + "route_id": stabilized["route_id"], + "need_fingerprint": stabilized["need_fingerprint"], + "confirmations": stabilized["confirmations"], + } + ) + for event in recent: + if len(selected) >= int(profile.get("activation", {}).get("max_items") or 8): + break + if fingerprint and event.get("need_fingerprint") != fingerprint and event.get("session_kind") != session_kind: + continue + selected.append( + { + "kind": "episode", + "event_id": event.get("event_id", ""), + "session_kind": event.get("session_kind", ""), + "command": event.get("command", ""), + "route_id": event.get("route_id", ""), + "outcome": event.get("outcome", ""), + "diagnostic_codes": list(event.get("diagnostic_codes") or ()), + } + ) + activation = { + "schema_version": OPS_WARDEN_ACTIVATION_SCHEMA, + "id": f"ops-warden-activation:{stable_digest([session_kind, fingerprint, selected])}", + "valid": profile_validation["valid"], + "session_kind": session_kind, + "session_id": session_id, + "profile_id": OPS_WARDEN_PROFILE_ID, + "need_fingerprint": fingerprint, + "stabilized_route": stabilized, + "selected_episodes": selected, + "episode_count": len(events), + "llm_calls_avoided": bool(stabilized), + "operator_guidance": { + "worker": "Activate before Brain.plan; record after execute.", + "agent_session": "Call warden memory activate at session start.", + "operator": "WARDEN_AGENT_ID selects warden.agent. session_kind.", + }, + "diagnostics": list(profile_validation.get("diagnostics", ())), + } + memory.append_event( + build_session_event( + command="memory.activate", + session_kind=session_kind, + outcome="resolved", + session_id=session_id, + metadata={"selected_count": len(selected), "llm_calls_avoided": bool(stabilized)}, + ) + ) + return activation + + +def ops_warden_evaluation_report(scenarios_data: dict[str, Any]) -> dict[str, Any]: + scenarios = list(scenarios_data.get("scenarios") or ()) + scenario_reports: list[dict[str, Any]] = [] + metrics = { + "scenario_count": len(scenarios), + "routing_repeat_accuracy": 0.0, + "cross_runtime_continuity": 0.0, + "llm_calls_avoided_count": 0.0, + } + for scenario in scenarios: + events = list(scenario.get("events") or ()) + episode_metrics = ops_warden_evaluation_metrics(events) + expect = dict(scenario.get("expect") or {}) + diagnostics: list[dict[str, Any]] = [] + if expect.get("stabilized") and not stabilized_route_match(events, need_fingerprint_value=str(events[0].get("need_fingerprint") or "")): + diagnostics.append( + Diagnostic( + "error", + "ops_warden_stabilized_route_missing", + "Scenario expected a stabilized route match.", + "events", + ).to_dict() + ) + if expect.get("continuity") and episode_metrics["cross_runtime_continuity"] < 1.0: + diagnostics.append( + Diagnostic( + "error", + "ops_warden_cross_runtime_continuity_missing", + "Scenario expected cross-runtime continuity.", + "events", + ).to_dict() + ) + if expect.get("llm_calls_avoided") and episode_metrics["llm_calls_avoided_count"] < 1.0: + diagnostics.append( + Diagnostic( + "error", + "ops_warden_llm_avoidance_missing", + "Scenario expected an avoided llm-connect call.", + "events", + ).to_dict() + ) + scenario_reports.append( + { + "id": scenario.get("id", ""), + "metrics": episode_metrics, + "diagnostics": diagnostics, + } + ) + for key in ("routing_repeat_accuracy", "cross_runtime_continuity", "llm_calls_avoided_count"): + if key in episode_metrics: + metrics[key] = max(float(metrics[key]), float(episode_metrics[key])) + return { + "schema_version": "phase_memory.ops_warden.evaluation_report.v1", + "valid": not any(item.get("severity") == "error" for report in scenario_reports for item in report.get("diagnostics", ())), + "metrics": metrics, + "scenarios": scenario_reports, + "diagnostics": [ + item + for report in scenario_reports + for item in report.get("diagnostics", ()) + if item.get("severity") == "error" + ], + } + + +def ops_warden_evaluation_metrics(events: list[dict[str, Any]]) -> dict[str, float]: + total = float(len(events)) + resolved = float(sum(1 for event in events if event.get("outcome") == "resolved")) + escalated = float(sum(1 for event in events if event.get("outcome") == "escalated")) + activations = float(sum(1 for event in events if event.get("command") == "memory.activate")) + llm_avoided = float(sum(1 for event in events if (event.get("metadata") or {}).get("llm_calls_avoided"))) + agent_sessions = float( + sum(1 for event in events if str(event.get("session_kind") or "").startswith(SESSION_KIND_AGENT_PREFIX)) + ) + worker_sessions = float(sum(1 for event in events if event.get("session_kind") == SESSION_KIND_WORKER)) + continuity = 1.0 if agent_sessions > 0 and worker_sessions > 0 else 0.0 + return { + "episode_count": total, + "resolved_episode_count": resolved, + "escalated_episode_count": escalated, + "activation_count": activations, + "llm_calls_avoided_count": llm_avoided, + "cross_runtime_continuity": continuity, + "routing_repeat_accuracy": (resolved / total) if total else 0.0, + } + + +def ops_warden_adapter_pack() -> ExternalAdapterPack: + from .adapters import InMemoryMemoryEventLog, InMemoryMemoryGraphStore + from .external_adapters import ( + ADAPTER_CONFORMANCE_HELPERS, + FakeExternalPolicyGateway, + FakeKontextualRuntimeRegistry, + FakeMarkitectPackageCompiler, + FakeTelemetryAuditSink, + LiveShapedPermissionSemanticIndex, + ) + + class OpsWardenEventLog(InMemoryMemoryEventLog): + """Event log shape for ops-warden coordination episodes.""" + + class OpsWardenGraphStore(InMemoryMemoryGraphStore): + """Graph store shape for stabilized routing nodes.""" + + capability_requirements = { + "graph_store": ("ops-warden.coordination.graph",), + "event_log": ("ops-warden.coordination.events",), + "policy_gateway": ("ops-warden.coordination.policy",), + "audit_sink": ("ops-warden.coordination.audit",), + "package_compiler": ("ops-warden.coordination.activation",), + "semantic_index": ("ops-warden.coordination.retrieval",), + "runtime_registry": ("ops-warden.coordination.registry",), + } + ownership_boundaries = { + "graph_store": "phase-memory owns stabilized routing nodes; ops-warden owns episode ingestion", + "event_log": "phase-memory owns coordination episodes; ops-warden owns CLI/worker writers", + "policy_gateway": "ops-warden charter is rigid import-only; phase-memory never relaxes guardrails", + "audit_sink": "phase-memory owns redacted audit schema; ops-warden owns warden activity correlation", + "package_compiler": "phase-memory owns activation packages for worker and agent sessions", + "semantic_index": "phase-memory owns need-fingerprint retrieval for routing memory", + "runtime_registry": "ops-warden owns session_kind routing; phase-memory owns runtime envelopes", + } + adapters = { + "graph_store": OpsWardenGraphStore(), + "event_log": OpsWardenEventLog(), + "policy_gateway": FakeExternalPolicyGateway(), + "audit_sink": FakeTelemetryAuditSink(), + "package_compiler": FakeMarkitectPackageCompiler(), + "semantic_index": LiveShapedPermissionSemanticIndex(), + "runtime_registry": FakeKontextualRuntimeRegistry(), + } + return ExternalAdapterPack( + name=OPS_WARDEN_ADAPTER_PACK_NAME, + adapters=adapters, + capabilities=tuple(sorted({capability for values in capability_requirements.values() for capability in values})), + ownership_boundaries=ownership_boundaries, + required_conformance=dict(ADAPTER_CONFORMANCE_HELPERS), + capability_requirements=capability_requirements, + metadata={"profile_id": OPS_WARDEN_PROFILE_ID, "runtime_schema": OPS_WARDEN_RUNTIME_SCHEMA}, + ) \ No newline at end of file diff --git a/tests/fixtures/ops-warden-coordination-profile.json b/tests/fixtures/ops-warden-coordination-profile.json new file mode 100644 index 0000000..310e854 --- /dev/null +++ b/tests/fixtures/ops-warden-coordination-profile.json @@ -0,0 +1,46 @@ +{ + "schema_version": "markitect.memory.profile.v1", + "id": "ops-warden-coordination", + "title": "Ops-Warden Coordination Memory", + "intent": "Retain routing, coordination, escalation, and pilot-feedback episodes across worker, agent session, and operator CLI runtimes.", + "memory_kinds": ["reasoning", "conversation", "knowledge", "package"], + "stores": { + "reasoning": "local-graph-store", + "conversation": "local-event-log", + "knowledge": "local-graph-store", + "package": "markitect-context-package" + }, + "limits": { + "reasoning": {"max_nodes": 200}, + "conversation": {"max_nodes": 100}, + "package": {"max_items": 8} + }, + "retention": { + "conversation": {"stale_after_days": 14, "delete_after_days": 90} + }, + "refresh": { + "trigger": "routing-catalog-or-playbook-change" + }, + "compaction": { + "strategy": "summarize-coordination-thread-after-review" + }, + "activation": { + "max_items": 8, + "max_tokens": 2000 + }, + "policy": { + "required_labels": ["coordination-local"], + "durable_writes": "review-gated", + "secrets_allowed": false + }, + "observability": { + "emit_events": true + }, + "failure": { + "missing_runtime_store": "degrade-to-dry-run" + }, + "metadata": { + "rigid_import_only": ["charter", "action_allowlist"], + "runtime_modes": ["warden.worker", "warden.operator", "warden.agent.*"] + } +} \ No newline at end of file diff --git a/tests/fixtures/ops-warden-evaluation-scenarios.json b/tests/fixtures/ops-warden-evaluation-scenarios.json new file mode 100644 index 0000000..2f56b38 --- /dev/null +++ b/tests/fixtures/ops-warden-evaluation-scenarios.json @@ -0,0 +1,31 @@ +{ + "schema_version": "phase_memory.evaluation.scenarios.v1", + "scenarios": [ + { + "id": "ops-warden-routing-repeat", + "events": [ + {"session_kind": "warden.agent.grok", "command": "route find", "need_fingerprint": "abc123", "route_id": "openbao-api-key", "outcome": "resolved"}, + {"session_kind": "warden.agent.grok", "command": "route find", "need_fingerprint": "abc123", "route_id": "openbao-api-key", "outcome": "resolved"}, + {"session_kind": "warden.worker", "command": "worker run", "need_fingerprint": "abc123", "route_id": "openbao-api-key", "outcome": "resolved"} + ], + "expect": {"stabilized": true, "continuity": true} + }, + { + "id": "ops-warden-cross-runtime-continuity", + "events": [ + {"session_kind": "warden.agent.claude", "command": "route find", "outcome": "resolved"}, + {"session_kind": "warden.worker", "command": "worker run", "outcome": "escalated"} + ], + "expect": {"continuity": true} + }, + { + "id": "ops-warden-llm-avoidance", + "events": [ + {"session_kind": "warden.operator", "command": "route find", "need_fingerprint": "def456", "route_id": "ops-bridge-tunnel", "outcome": "resolved"}, + {"session_kind": "warden.operator", "command": "route find", "need_fingerprint": "def456", "route_id": "ops-bridge-tunnel", "outcome": "resolved"}, + {"session_kind": "warden.worker", "command": "memory.activate", "outcome": "resolved", "metadata": {"llm_calls_avoided": true}} + ], + "expect": {"llm_calls_avoided": true} + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/public-api-snapshot.json b/tests/fixtures/public-api-snapshot.json index 92f3f80..3b5cbde 100644 --- a/tests/fixtures/public-api-snapshot.json +++ b/tests/fixtures/public-api-snapshot.json @@ -24,6 +24,7 @@ "FakeKontextualRuntimeRegistry", "FakeMarkitectPackageCompiler", "FakeTelemetryAuditSink", + "KNOWN_AGENT_IDS", "LIVE_PILOT_REPORT_SCHEMA", "LifecycleAction", "LifecycleActionKind", @@ -52,6 +53,13 @@ "MemoryPath", "MemoryPathState", "MemoryPhase", + "OPS_WARDEN_ACTIVATION_SCHEMA", + "OPS_WARDEN_ADAPTER_PACK_NAME", + "OPS_WARDEN_MEMORY_STATUS_SCHEMA", + "OPS_WARDEN_PROFILE_ID", + "OPS_WARDEN_RUNTIME_SCHEMA", + "OPS_WARDEN_SESSION_EVENT_SCHEMA", + "OpsWardenMemoryStore", "OptionalMarkitectValidator", "PHASE_MEMORY_CREDENTIAL_NEEDS", "POLICY_OPERATION_POINTS", @@ -77,10 +85,12 @@ "WARDEN_ROUTE_FIND_QUERY", "WordCountTokenEstimator", "abandon_path", + "activate_ops_warden_memory", "activation_quality_report", "adapter_pack_manifest", "branch_path", "build_service_binding", + "build_session_event", "compact_path", "create_path", "create_wsgi_app", @@ -88,6 +98,7 @@ "credentialed_drill_config_from_env", "credentialed_operator_report", "credentialed_telemetry_retention_drill", + "default_memory_store_path", "evaluation_threshold_report", "evaluation_trend_artifact", "evaluation_trend_history", @@ -102,9 +113,14 @@ "make_review_record", "managed_deployment_manifest", "managed_deployment_pilot_report", + "memory_enabled", "merge_path", "missing_credentialed_adapter_env", "operator_troubleshooting_matrix", + "ops_warden_adapter_pack", + "ops_warden_coordination_profile", + "ops_warden_evaluation_metrics", + "ops_warden_evaluation_report", "package_request_from_selection", "package_response_envelope", "path_event", @@ -119,17 +135,23 @@ "plan_retention", "plan_retention_from_rules", "profile_from_markitect", + "record_session_event", "resolve_credentialed_environ", "resolve_runtime_adapters", + "resolve_session_kind", "retrieve_graph_neighborhood", "runtime_from_config", "select_event_path", "service_app_metadata", "service_binding_from_config", "service_contracts", + "session_kind_for_agent", + "stabilized_route_match", "validate_adapter_pack_manifest", "validate_managed_deployment_manifest", + "validate_memory_write", "validate_operator_troubleshooting_matrix", + "validate_ops_warden_profile", "warden_access_advisory", "warden_cli_available", "warden_credential_routing_advisory", diff --git a/tests/test_ops_warden_memory.py b/tests/test_ops_warden_memory.py new file mode 100644 index 0000000..b7218c7 --- /dev/null +++ b/tests/test_ops_warden_memory.py @@ -0,0 +1,150 @@ +import json +from pathlib import Path + +from phase_memory.ops_warden import ( + OPS_WARDEN_ACTIVATION_SCHEMA, + OPS_WARDEN_MEMORY_STATUS_SCHEMA, + OPS_WARDEN_PROFILE_ID, + OPS_WARDEN_RUNTIME_SCHEMA, + OPS_WARDEN_SESSION_EVENT_SCHEMA, + activate_ops_warden_memory, + build_session_event, + memory_enabled, + ops_warden_adapter_pack, + ops_warden_coordination_profile, + ops_warden_evaluation_metrics, + record_session_event, + resolve_session_kind, + session_kind_for_agent, + stabilized_route_match, + validate_memory_write, + validate_ops_warden_profile, +) +from phase_memory.external_adapters import validate_adapter_pack_manifest + +FIXTURES = Path(__file__).parent / "fixtures" + + +def test_ops_warden_profile_validates_with_activation_budget() -> None: + profile = ops_warden_coordination_profile(FIXTURES / "ops-warden-coordination-profile.json") + validation = validate_ops_warden_profile(profile) + + assert profile["id"] == OPS_WARDEN_PROFILE_ID + assert profile["policy"]["secrets_allowed"] is False + assert validation["valid"] is True + + +def test_validate_memory_write_rejects_secret_like_payload() -> None: + ok, diagnostics = validate_memory_write( + { + "command": "access", + "outcome": "resolved", + "metadata": {"token": "super-secret-token"}, + } + ) + + assert ok is False + assert diagnostics[0].code == "ops_warden_memory_secret_field_rejected" + + +def test_session_kind_supports_known_and_future_agents() -> None: + assert session_kind_for_agent("grok") == "warden.agent.grok" + assert session_kind_for_agent(None) == "warden.operator" + assert resolve_session_kind({"WARDEN_AGENT_ID": "claude"}) == "warden.agent.claude" + assert resolve_session_kind({"WARDEN_SESSION_KIND": "warden.worker"}) == "warden.worker" + + +def test_record_and_activate_ops_warden_memory_round_trip(tmp_path) -> None: + store = tmp_path / "memory" + record_session_event( + store, + build_session_event( + command="route find", + session_kind="warden.agent.grok", + outcome="resolved", + need="openrouter api key", + route_id="openrouter-llm-connect", + agent_id="grok", + session_id="sess-1", + ), + ) + record_session_event( + store, + build_session_event( + command="route find", + session_kind="warden.agent.grok", + outcome="resolved", + need="openrouter api key", + route_id="openrouter-llm-connect", + agent_id="grok", + session_id="sess-1", + ), + ) + + activation = activate_ops_warden_memory( + store, + session_kind="warden.worker", + need="openrouter api key", + session_id="worker-1", + ) + + assert activation["schema_version"] == OPS_WARDEN_ACTIVATION_SCHEMA + assert activation["stabilized_route"]["route_id"] == "openrouter-llm-connect" + assert activation["llm_calls_avoided"] is True + + +def test_cross_runtime_continuity_is_visible_to_worker_activation(tmp_path) -> None: + store = tmp_path / "memory" + record_session_event( + store, + build_session_event( + command="route find", + session_kind="warden.agent.codex", + outcome="resolved", + need="npm token", + route_id="openbao-api-key", + agent_id="codex", + ), + ) + activation = activate_ops_warden_memory(store, session_kind="warden.worker", need="npm token") + + kinds = {item.get("session_kind") for item in activation["selected_episodes"] if item.get("kind") == "episode"} + assert "warden.agent.codex" in kinds + + +def test_ops_warden_evaluation_metrics_and_adapter_pack() -> None: + scenarios = json.loads((FIXTURES / "ops-warden-evaluation-scenarios.json").read_text(encoding="utf-8")) + events = scenarios["scenarios"][0]["events"] + metrics = ops_warden_evaluation_metrics(events) + pack = ops_warden_adapter_pack() + diagnostics = validate_adapter_pack_manifest(pack) + + assert metrics["cross_runtime_continuity"] == 1.0 + assert metrics["routing_repeat_accuracy"] > 0 + assert not any(item.severity == "error" for item in diagnostics) + assert pack.name == "ops-warden-coordination" + + +def test_stabilized_route_match_requires_confirmations() -> None: + events = [ + {"need_fingerprint": "abc", "route_id": "openbao-api-key", "outcome": "resolved"}, + {"need_fingerprint": "abc", "route_id": "openbao-api-key", "outcome": "resolved"}, + ] + + assert stabilized_route_match(events, need_fingerprint_value="abc")["route_id"] == "openbao-api-key" + assert stabilized_route_match(events[:1], need_fingerprint_value="abc") is None + + +def test_memory_enabled_honors_opt_out() -> None: + assert memory_enabled({"WARDEN_MEMORY": "1"}) is True + assert memory_enabled({"WARDEN_MEMORY": "0"}) is False + + +def test_ops_warden_evaluation_report_passes_fixture_scenarios() -> None: + from phase_memory.ops_warden import ops_warden_evaluation_report + + scenarios = json.loads((FIXTURES / "ops-warden-evaluation-scenarios.json").read_text(encoding="utf-8")) + report = ops_warden_evaluation_report(scenarios) + + assert report["valid"] is True + assert report["metrics"]["scenario_count"] == 3 \ No newline at end of file diff --git a/workplans/PMEM-WP-0016-ops-warden-cross-runtime-memory.md b/workplans/PMEM-WP-0016-ops-warden-cross-runtime-memory.md index 45e7162..14be485 100644 --- a/workplans/PMEM-WP-0016-ops-warden-cross-runtime-memory.md +++ b/workplans/PMEM-WP-0016-ops-warden-cross-runtime-memory.md @@ -4,11 +4,12 @@ type: workplan title: "Ops-Warden Cross-Runtime Memory Profile And Contracts" domain: communication repo: phase-memory -status: proposed +status: finished owner: codex topic_slug: phase-memory created: "2026-07-02" updated: "2026-07-02" +state_hub_workstream_id: "0f817f83-bb43-4e60-b3a7-b7cca0e9e6ed" --- # PMEM-WP-0016: Ops-Warden Cross-Runtime Memory Profile And Contracts @@ -81,8 +82,9 @@ activation budget — not in storage location. ```task id: PMEM-WP-0016-T01 -status: todo +status: done priority: high +state_hub_task_id: "53dbc3f0-d2cb-4e0a-b558-64166ab976f3" ``` Author the `ops-warden-coordination` memory profile covering routing, @@ -102,8 +104,9 @@ Acceptance: ```task id: PMEM-WP-0016-T02 -status: todo +status: done priority: high +state_hub_task_id: "6933918e-3ae0-4e0b-aade-1fbcf88ef0a1" ``` Define the `phase_memory.ops_warden.runtime.v1` contract: canonical store path @@ -123,8 +126,9 @@ Acceptance: ```task id: PMEM-WP-0016-T03 -status: todo +status: done priority: high +state_hub_task_id: "9d436ccb-11df-49f1-82bd-eb75b1834018" ``` Define metadata-only event schemas for coding agent sessions (Claude, Codex, @@ -143,8 +147,9 @@ Acceptance: ```task id: PMEM-WP-0016-T04 -status: todo +status: done priority: high +state_hub_task_id: "2f1f65a3-9a82-4854-83bb-20ef5eb3bac8" ``` Build an ops-warden activation pack that retrieves relevant coordination @@ -164,8 +169,9 @@ Acceptance: ```task id: PMEM-WP-0016-T05 -status: todo +status: done priority: medium +state_hub_task_id: "ba0d35b3-2de1-483f-9761-cec5cbe3e8da" ``` Add evaluation scenarios and trend metrics for ops-warden memory quality across @@ -184,8 +190,9 @@ Acceptance: ```task id: PMEM-WP-0016-T06 -status: todo +status: done priority: medium +state_hub_task_id: "d430906b-6852-4328-8bc9-ad8c79fa8b03" ``` Publish a live-shaped ops-warden adapter pack and conformance suite for @@ -217,4 +224,13 @@ Acceptance: ## Closure Review -Pending implementation. \ No newline at end of file +Implemented cross-runtime memory contracts and activation helpers: + +- `ops-warden-coordination` profile fixture with activation budgets and + `secrets_allowed: false`. +- `phase_memory.ops_warden.runtime.v1` contract with canonical store path, + `session_kind` coverage, and secret-field rejection. +- Agent session event schema supporting claude, codex, grok, and future agent ids. +- `activate_ops_warden_memory`, stabilized route matching, and ops-warden adapter + pack with evaluation scenarios and report helpers. +- Contract documentation in `docs/ops-warden-memory-contract.md`. \ No newline at end of file