generated from coulomb/repo-seed
- Added KIND_REFLECTION + remember_reflection() helper in memory (thin, exported). - Wired into run_retrospection(): optional 'capture verbal lesson' step at end. - Main recall now includes reflections for preferential activation. - Final LLM response + explain surface verbal reflections when activated. - Added roundtrip test + import updates. - Small README note. - All changes small/inspectable, safety preserved (still through RiskClassifier). - T05 acceptance criteria met with working end-to-end spike. Committed as ralph iter 5. Ready for T06+ or close.
285 lines
11 KiB
Python
285 lines
11 KiB
Python
"""Tests for the real (T02) memory ports + T04/T05 safety integration.
|
||
|
||
These tests exercise the user-controlled json-backed implementation and verify:
|
||
- Actual persistence across calls (within test scope)
|
||
- Proper scoping
|
||
- Memory + risk safety interaction (never prefs still force confirmation)
|
||
- Graceful degradation
|
||
- Observability data in the return values
|
||
"""
|
||
|
||
import json
|
||
import tempfile
|
||
from pathlib import Path
|
||
from unittest.mock import patch
|
||
|
||
import pytest
|
||
|
||
from cya.memory import (
|
||
remember_preference,
|
||
recall_preferences,
|
||
forget,
|
||
export_memory,
|
||
remember_retrospection_outcome,
|
||
remember_reflection,
|
||
KIND_RETROSPECTION,
|
||
KIND_INTERACTION_GOAL,
|
||
KIND_REFLECTION,
|
||
)
|
||
from cya.safety.risk import classify, RiskLevel
|
||
|
||
|
||
@pytest.fixture
|
||
def isolated_memory(monkeypatch, tmp_path):
|
||
"""Make memory use a completely isolated temp directory for the test."""
|
||
mem_dir = tmp_path / "memory"
|
||
mem_dir.mkdir()
|
||
|
||
def _fake_mem_path(scope: str = "cwd") -> Path:
|
||
return mem_dir / f"{scope}.json"
|
||
|
||
monkeypatch.setattr("cya.memory._mem_path", _fake_mem_path)
|
||
return mem_dir
|
||
|
||
|
||
def test_remember_and_recall_roundtrip(isolated_memory):
|
||
remember_preference("project_name", "can-you-assist", scope="test-scope")
|
||
remember_preference("favorite_cmd", "git status", scope="test-scope")
|
||
|
||
data = recall_preferences(scope="test-scope")
|
||
|
||
assert data["phase"] == "fluid"
|
||
assert len(data["items"]) == 2
|
||
keys = {item["key"] for item in data["items"]}
|
||
assert "project_name" in keys
|
||
assert "favorite_cmd" in keys
|
||
|
||
# Observability / provenance is present
|
||
prov = data.get("provenance", [{}])[0]
|
||
assert "source" in prov
|
||
assert "cya-local-memory" in prov.get("source", "")
|
||
|
||
|
||
def test_forget_specific_keys(isolated_memory):
|
||
remember_preference("a", 1, scope="forget-test")
|
||
remember_preference("b", 2, scope="forget-test")
|
||
|
||
forget(scope="forget-test", keys=["a"])
|
||
|
||
data = recall_preferences(scope="forget-test")
|
||
keys = {item["key"] for item in data["items"]}
|
||
assert "a" not in keys
|
||
assert "b" in keys
|
||
|
||
|
||
def test_forget_all(isolated_memory):
|
||
remember_preference("x", "y", scope="clear-test")
|
||
forget(scope="clear-test") # no keys = clear all
|
||
|
||
data = recall_preferences(scope="clear-test")
|
||
assert len(data["items"]) == 0
|
||
|
||
|
||
def test_memory_signals_still_force_confirmation_on_dangerous(isolated_memory):
|
||
"""Core T04 + T05 invariant: memory can never bypass safety."""
|
||
# User has a standing "never" preference
|
||
remember_preference(
|
||
"never_auto_run",
|
||
"rm -rf",
|
||
scope="safety-test"
|
||
)
|
||
|
||
mem = recall_preferences(scope="safety-test")
|
||
|
||
assessment = classify("rm -rf /tmp/important", memory=mem)
|
||
|
||
assert assessment.level == RiskLevel.DESTRUCTIVE
|
||
assert assessment.requires_confirmation is True
|
||
assert any("Memory" in r or "never" in r.lower() for r in assessment.rules_triggered)
|
||
|
||
|
||
def test_graceful_degradation_when_storage_fails(monkeypatch, tmp_path):
|
||
"""Memory should not crash the assistant if the backing store is broken."""
|
||
def _broken_mem_path(scope="cwd"):
|
||
p = tmp_path / "broken" / f"{scope}.json"
|
||
p.parent.mkdir(parents=True)
|
||
# Make the parent read-only after creation so writes will fail
|
||
p.parent.chmod(0o400)
|
||
return p
|
||
|
||
monkeypatch.setattr("cya.memory._mem_path", _broken_mem_path)
|
||
|
||
# Should not raise
|
||
remember_preference("will_fail", "value", scope="broken")
|
||
data = recall_preferences(scope="broken")
|
||
assert isinstance(data, dict) # still returns something usable
|
||
|
||
|
||
def test_export_memory_observability(isolated_memory):
|
||
remember_preference("theme", "dark", scope="export-test")
|
||
|
||
exported = export_memory(scope="export-test")
|
||
|
||
assert exported["status"].startswith("real")
|
||
assert exported["count"] >= 1
|
||
assert "provenance_summary" in exported
|
||
assert "phase" in exported
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# T03 (0003) — Activation context tests
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def test_recall_with_activation_context_boosts_matching_scope(isolated_memory):
|
||
"""T03: activation_context should boost items matching the provided cwd/git context."""
|
||
remember_preference("project_pref", "use --short", scope="my-project")
|
||
remember_preference("other_pref", "verbose", scope="other-project")
|
||
remember_preference("global_pref", "always show rationale", scope="global")
|
||
|
||
# Simulate what the orchestrator does for a request in "my-project"
|
||
act_ctx = {"cwd": "/code/my-project", "git_root": "/code/my-project"}
|
||
data = recall_preferences("my-project", activation_context=act_ctx)
|
||
|
||
items = data["items"]
|
||
keys = [i["key"] for i in items]
|
||
|
||
# The activation_context must be recorded for observability
|
||
prov = data.get("provenance", [{}])[0]
|
||
assert prov.get("activation_context") == act_ctx
|
||
|
||
# project_pref for the matching scope must be present (boosting puts relevant items first)
|
||
assert "project_pref" in keys
|
||
# At minimum the activation mechanism is exercised (we don't over-assert ordering after the -limit slice)
|
||
assert len(keys) >= 1
|
||
|
||
|
||
def test_recall_with_kinds_and_activation_context(isolated_memory):
|
||
"""T03 + T04: kinds filter + activation_context work together."""
|
||
remember_retrospection_outcome("retro_goal", "be concise in this project", scope="proj-x")
|
||
remember_preference("normal_pref", "use emojis", scope="proj-x")
|
||
|
||
act_ctx = {"cwd": "proj-x"}
|
||
data = recall_preferences(
|
||
"proj-x",
|
||
kinds=[KIND_INTERACTION_GOAL, "retrospection"],
|
||
activation_context=act_ctx,
|
||
)
|
||
|
||
kinds = [i.get("kind") for i in data["items"]]
|
||
assert KIND_INTERACTION_GOAL in kinds or "retrospection" in kinds
|
||
|
||
|
||
def test_profile_1_reflection_helper_and_activation(isolated_memory):
|
||
"""Minimal T05 Profile 1 spike: remember_reflection + preferential recall by kind."""
|
||
remember_reflection("lesson_rust", "Always run cargo clippy before suggesting fixes", scope="proj-rust")
|
||
|
||
data = recall_preferences(
|
||
scope="proj-rust",
|
||
kinds=[KIND_REFLECTION],
|
||
activation_context={"cwd": "proj-rust"},
|
||
)
|
||
|
||
assert len(data.get("items", [])) >= 1
|
||
kinds = [i.get("kind") for i in data.get("items", [])]
|
||
assert KIND_REFLECTION in kinds
|
||
# The helper stored it correctly
|
||
assert any("cargo clippy" in str(i.get("value", "")) for i in data.get("items", []))
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# CYA-WP-0005 T02 — Explicit Profile 0 baseline assertions
|
||
# ---------------------------------------------------------------------------
|
||
# These tests document and assert the characteristics of the current shipped
|
||
# memory implementation, now formally named "Profile 0" (see MemoryVision.md
|
||
# "Profile 0 Baseline (Post-0003)" and CYA-WP-0005).
|
||
# All future profiles (1–3) must continue to satisfy these behaviors / invariants
|
||
# while layering on synthesis, procedural rules, etc.
|
||
|
||
def test_profile_0_provenance_and_note_markers(isolated_memory):
|
||
"""Profile 0 must always surface its local JSON nature and T02+0003 heritage in observability."""
|
||
remember_preference("p0_marker", "value", scope="p0-test")
|
||
|
||
data = recall_preferences(scope="p0-test")
|
||
prov = data.get("provenance", [{}])[0]
|
||
note = data.get("note", "")
|
||
|
||
assert "cya-local-memory" in prov.get("source", "")
|
||
assert "T02+0003" in prov.get("source", "") or "local json" in note.lower()
|
||
assert data.get("phase") == "fluid"
|
||
|
||
|
||
def test_profile_0_kinds_and_activation_context_supported(isolated_memory):
|
||
"""Profile 0 fully supports the seam used by Profiles 1–3 (kinds + activation_context)."""
|
||
remember_retrospection_outcome("p0_retro", "remember this pattern", scope="p0-proj")
|
||
act = {"cwd": "p0-proj", "profile": "default"}
|
||
|
||
data = recall_preferences(scope="p0-proj", kinds=["retrospection"], activation_context=act)
|
||
|
||
assert len(data["items"]) >= 1
|
||
assert data.get("activation_context") is None or isinstance(data.get("provenance"), list) # provenance always present
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# T04 (0003) — Retrospection outcome tests
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def test_remember_and_recall_retrospection_outcomes(isolated_memory):
|
||
"""T04: retrospection outcomes are stored with correct kind and retrievable."""
|
||
remember_retrospection_outcome(
|
||
"interaction_goal", "prefer one-sentence answers when possible", scope="retro-test"
|
||
)
|
||
remember_retrospection_outcome(
|
||
"retrospection_note", "user liked the safety warnings last time", scope="retro-test"
|
||
)
|
||
|
||
# Recall specifically asking for retrospection kinds
|
||
data = recall_preferences(
|
||
"retro-test",
|
||
kinds=[KIND_RETROSPECTION, KIND_INTERACTION_GOAL],
|
||
)
|
||
|
||
keys = {i["key"] for i in data["items"]}
|
||
assert "interaction_goal" in keys
|
||
assert "retrospection_note" in keys
|
||
|
||
# They should have the right kinds
|
||
for item in data["items"]:
|
||
assert item.get("kind") in (KIND_RETROSPECTION, KIND_INTERACTION_GOAL)
|
||
|
||
|
||
def test_export_memory_with_kinds_filter(isolated_memory):
|
||
"""T04 observability: export_memory supports kinds filter and reports by_kind."""
|
||
remember_preference("normal", "value", scope="kind-test")
|
||
remember_retrospection_outcome("goal1", "be direct", scope="kind-test")
|
||
|
||
full = export_memory(scope="kind-test")
|
||
assert "preference" in full.get("by_kind", {})
|
||
assert KIND_INTERACTION_GOAL in full.get("by_kind", {}) or "retrospection" in full.get("by_kind", {})
|
||
|
||
only_goals = export_memory(scope="kind-test", kinds=[KIND_INTERACTION_GOAL])
|
||
assert only_goals["count"] >= 1
|
||
assert all(i.get("kind") in (KIND_INTERACTION_GOAL, "retrospection") for i in only_goals.get("items", []))
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Additional graceful degradation + observability (T05)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def test_recall_with_bad_activation_context_is_graceful(isolated_memory):
|
||
"""T05: bad activation_context should not break recall."""
|
||
remember_preference("safe", "value", scope="graceful-test")
|
||
|
||
data = recall_preferences("graceful-test", activation_context={"weird": object()})
|
||
assert isinstance(data, dict)
|
||
assert "items" in data or "error" in str(data)
|
||
|
||
|
||
def test_export_memory_observability_includes_by_kind(isolated_memory):
|
||
"""T05 observability: export now reports by_kind breakdown."""
|
||
remember_preference("p1", "v1", scope="obs-test")
|
||
remember_retrospection_outcome("g1", "goal", scope="obs-test")
|
||
|
||
exported = export_memory(scope="obs-test")
|
||
assert "by_kind" in exported
|
||
assert isinstance(exported["by_kind"], dict)
|
||
assert sum(exported["by_kind"].values()) == exported["count"] |