generated from coulomb/repo-seed
Implement refinement hardening workplan
This commit is contained in:
170
tests/fixtures/evaluation-scenarios.json
vendored
Normal file
170
tests/fixtures/evaluation-scenarios.json
vendored
Normal file
@@ -0,0 +1,170 @@
|
||||
{
|
||||
"schema_version": "phase_memory.evaluation_scenarios.v1",
|
||||
"scenarios": [
|
||||
{
|
||||
"id": "policy-denied-activation",
|
||||
"profile": {
|
||||
"schema_version": "markitect.memory.profile.v1",
|
||||
"id": "eval-policy-profile",
|
||||
"memory_kinds": ["knowledge", "decision"],
|
||||
"activation": {"max_items": 4, "max_tokens": 60},
|
||||
"policy": {"mode": "allow-all", "trust_zone_labels": ["local"]},
|
||||
"observability": {"audit_sink": "recording"}
|
||||
},
|
||||
"graph": {
|
||||
"schema_version": "markitect.memory.graph.v1",
|
||||
"id": "eval-policy-graph",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "policy.public",
|
||||
"kind": "knowledge",
|
||||
"text": "Public operating constraint that can be activated for local planning.",
|
||||
"phase": "stabilized",
|
||||
"policy": {"labels": ["public"], "trust_zone": "local"},
|
||||
"source_spans": [{"path": "policy.md", "line_start": 1}],
|
||||
"metadata": {"graph_id": "eval-policy-graph"}
|
||||
},
|
||||
{
|
||||
"id": "policy.secret",
|
||||
"kind": "knowledge",
|
||||
"text": "Sensitive credential note that must not enter restart context.",
|
||||
"phase": "stabilized",
|
||||
"policy": {"labels": ["restricted"], "trust_zone": "local", "secret": true},
|
||||
"metadata": {"graph_id": "eval-policy-graph"}
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"id": "edge.policy",
|
||||
"kind": "references",
|
||||
"source": "policy.public",
|
||||
"target": "policy.secret"
|
||||
}
|
||||
],
|
||||
"events": []
|
||||
},
|
||||
"expect": {"denied_node_ids": ["policy.secret"]}
|
||||
},
|
||||
{
|
||||
"id": "profile-lifecycle-rules",
|
||||
"profile": {
|
||||
"schema_version": "markitect.memory.profile.v1",
|
||||
"id": "eval-lifecycle-profile",
|
||||
"memory_kinds": ["episode", "decision"],
|
||||
"retention": {
|
||||
"episode": {"stale_after_days": 7},
|
||||
"decision": {"delete_after_days": 365}
|
||||
},
|
||||
"refresh": {"mode": "enabled"},
|
||||
"compaction": {"node_ids": ["life.old-episode"]},
|
||||
"metadata": {
|
||||
"phase_transitions": [
|
||||
{
|
||||
"node_kind": "decision",
|
||||
"from_phase": "fluid",
|
||||
"to_phase": "stabilized",
|
||||
"min_age_days": 2,
|
||||
"reason": "decision has stabilized"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"graph": {
|
||||
"schema_version": "markitect.memory.graph.v1",
|
||||
"id": "eval-lifecycle-graph",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "life.old-episode",
|
||||
"kind": "episode",
|
||||
"text": "An old episode ready to become stale and compacted.",
|
||||
"phase": "fluid",
|
||||
"freshness": {"updated_at": "2026-04-01T00:00:00+00:00", "source_digest": "old"},
|
||||
"metadata": {"graph_id": "eval-lifecycle-graph"}
|
||||
},
|
||||
{
|
||||
"id": "life.decision",
|
||||
"kind": "decision",
|
||||
"text": "A decision that should transition to stabilized after review.",
|
||||
"phase": "fluid",
|
||||
"freshness": {"updated_at": "2026-05-01T00:00:00+00:00", "source_digest": "decision-old"},
|
||||
"metadata": {"graph_id": "eval-lifecycle-graph"}
|
||||
}
|
||||
],
|
||||
"edges": [],
|
||||
"events": []
|
||||
},
|
||||
"expect": {
|
||||
"actions": [
|
||||
["life.old-episode", "mark_stale"],
|
||||
["life.decision", "transition_phase"],
|
||||
["life.decision", "refresh"]
|
||||
],
|
||||
"compact_source": "life.old-episode"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "budget-path-and-semantic-hints",
|
||||
"profile": {
|
||||
"schema_version": "markitect.memory.profile.v1",
|
||||
"id": "eval-budget-profile",
|
||||
"memory_kinds": ["decision", "knowledge", "episode"],
|
||||
"activation": {"max_items": 2, "max_tokens": 16, "semantic_index": "memory"}
|
||||
},
|
||||
"graph": {
|
||||
"schema_version": "markitect.memory.graph.v1",
|
||||
"id": "eval-budget-graph",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "budget.anchor",
|
||||
"kind": "decision",
|
||||
"text": "Restart anchor with source.",
|
||||
"phase": "stabilized",
|
||||
"source_spans": [{"path": "restart.md", "line_start": 3}],
|
||||
"metadata": {"graph_id": "eval-budget-graph"}
|
||||
},
|
||||
{
|
||||
"id": "budget.semantic",
|
||||
"kind": "knowledge",
|
||||
"text": "Semantic index hint for restart package selection.",
|
||||
"phase": "stabilized",
|
||||
"source_spans": [{"path": "retrieval.md", "line_start": 7}],
|
||||
"metadata": {"graph_id": "eval-budget-graph"}
|
||||
},
|
||||
{
|
||||
"id": "budget.long",
|
||||
"kind": "episode",
|
||||
"text": "This verbose episode is intentionally long enough to lose against the strict activation token budget pressure.",
|
||||
"phase": "fluid",
|
||||
"metadata": {"graph_id": "eval-budget-graph"}
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"id": "edge.budget",
|
||||
"kind": "supports",
|
||||
"source": "budget.anchor",
|
||||
"target": "budget.semantic"
|
||||
}
|
||||
],
|
||||
"events": [
|
||||
{
|
||||
"id": "budget.path-event",
|
||||
"kind": "activated",
|
||||
"timestamp": "2026-05-18T00:00:00+00:00",
|
||||
"activation_refs": ["activation.budget"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"path": {
|
||||
"id": "path.budget",
|
||||
"event_ids": ["budget.path-event"]
|
||||
},
|
||||
"expect": {
|
||||
"selected_node_ids": ["budget.anchor", "budget.semantic"],
|
||||
"omitted_node_ids": ["budget.long"],
|
||||
"semantic_top_id": "budget.semantic",
|
||||
"event_ids": ["budget.path-event"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user