generated from coulomb/repo-seed
52 lines
2.1 KiB
YAML
52 lines
2.1 KiB
YAML
schema_version: infospace-bench.memory-context-evaluation.v1
|
|
evaluated_at: "2026-05-15T00:20:00Z"
|
|
workplan: IB-WP-0017
|
|
profile_id: infospace-agentic-memory-pilot
|
|
graph_id: infospace-agentic-memory-graph
|
|
selection: restart-context-selection.yaml
|
|
questions:
|
|
- id: restart-quality
|
|
prompt: Can a later agent resume Wealth/VSM review with the selected package?
|
|
score: 4.2
|
|
max_score: 5.0
|
|
result: pass
|
|
- id: provenance-review
|
|
prompt: Can a reviewer see why each memory item exists?
|
|
score: 5.0
|
|
max_score: 5.0
|
|
result: pass
|
|
- id: budget-realism
|
|
prompt: Does the selection fit a compact restart budget?
|
|
score: 4.0
|
|
max_score: 5.0
|
|
result: pass
|
|
- id: noise-control
|
|
prompt: Does the package omit low-value trace detail?
|
|
score: 3.8
|
|
max_score: 5.0
|
|
result: watch
|
|
metrics:
|
|
restart_quality_score: 4.2
|
|
provenance_coverage_ratio: 1.0
|
|
selected_node_count: 7
|
|
expected_item_count: 8
|
|
context_package_budget_max_tokens: 1200
|
|
live_llm_required: false
|
|
findings:
|
|
- id: finding.restart-risk
|
|
summary: Decision-only memory is not enough; restart packages need the active source/entity neighborhood.
|
|
- id: finding.neighborhood-improves-review
|
|
summary: The Division of Labour source and entity references make the boundary decision actionable for review.
|
|
- id: finding.profile-gap
|
|
summary: Profile retention intent is useful, but acceptance thresholds remain application-level metrics in infospace-bench.
|
|
recommended_contract_changes:
|
|
markitect-tool:
|
|
- Add an option for timestamp-stable context package fixture output to simplify cross-repo golden files.
|
|
- Document when selected events should become package items versus metadata.
|
|
kontextual-engine:
|
|
- Import Markitect graph/profile envelopes and persist runtime audit events separately from contract events.
|
|
- Keep durable write plans review-gated and export Markitect-compatible package input envelopes.
|
|
infospace-bench:
|
|
- Keep memory quality metrics and pilot corpora here.
|
|
- Do not store user memory, credentials, or runtime graph state in an infospace.
|