Files
can-you-assist/tests/test_memory.py
tegwick c60b3b5001 Fix flaky test_profile_0_kinds... (key name for retrospection kind matching) so all memory tests pass cleanly post T05
Part of final verification that 'installing still works' after the CYA-WP-0005 ralph loop.
2026-05-28 03:30:28 +02:00

286 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for the real (T02) memory ports + T04/T05 safety integration.
These tests exercise the user-controlled json-backed implementation and verify:
- Actual persistence across calls (within test scope)
- Proper scoping
- Memory + risk safety interaction (never prefs still force confirmation)
- Graceful degradation
- Observability data in the return values
"""
import json
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
from cya.memory import (
remember_preference,
recall_preferences,
forget,
export_memory,
remember_retrospection_outcome,
remember_reflection,
KIND_RETROSPECTION,
KIND_INTERACTION_GOAL,
KIND_REFLECTION,
)
from cya.safety.risk import classify, RiskLevel
@pytest.fixture
def isolated_memory(monkeypatch, tmp_path):
"""Make memory use a completely isolated temp directory for the test."""
mem_dir = tmp_path / "memory"
mem_dir.mkdir()
def _fake_mem_path(scope: str = "cwd") -> Path:
return mem_dir / f"{scope}.json"
monkeypatch.setattr("cya.memory._mem_path", _fake_mem_path)
return mem_dir
def test_remember_and_recall_roundtrip(isolated_memory):
remember_preference("project_name", "can-you-assist", scope="test-scope")
remember_preference("favorite_cmd", "git status", scope="test-scope")
data = recall_preferences(scope="test-scope")
assert data["phase"] == "fluid"
assert len(data["items"]) == 2
keys = {item["key"] for item in data["items"]}
assert "project_name" in keys
assert "favorite_cmd" in keys
# Observability / provenance is present
prov = data.get("provenance", [{}])[0]
assert "source" in prov
assert "cya-local-memory" in prov.get("source", "")
def test_forget_specific_keys(isolated_memory):
remember_preference("a", 1, scope="forget-test")
remember_preference("b", 2, scope="forget-test")
forget(scope="forget-test", keys=["a"])
data = recall_preferences(scope="forget-test")
keys = {item["key"] for item in data["items"]}
assert "a" not in keys
assert "b" in keys
def test_forget_all(isolated_memory):
remember_preference("x", "y", scope="clear-test")
forget(scope="clear-test") # no keys = clear all
data = recall_preferences(scope="clear-test")
assert len(data["items"]) == 0
def test_memory_signals_still_force_confirmation_on_dangerous(isolated_memory):
"""Core T04 + T05 invariant: memory can never bypass safety."""
# User has a standing "never" preference
remember_preference(
"never_auto_run",
"rm -rf",
scope="safety-test"
)
mem = recall_preferences(scope="safety-test")
assessment = classify("rm -rf /tmp/important", memory=mem)
assert assessment.level == RiskLevel.DESTRUCTIVE
assert assessment.requires_confirmation is True
assert any("Memory" in r or "never" in r.lower() for r in assessment.rules_triggered)
def test_graceful_degradation_when_storage_fails(monkeypatch, tmp_path):
"""Memory should not crash the assistant if the backing store is broken."""
def _broken_mem_path(scope="cwd"):
p = tmp_path / "broken" / f"{scope}.json"
p.parent.mkdir(parents=True)
# Make the parent read-only after creation so writes will fail
p.parent.chmod(0o400)
return p
monkeypatch.setattr("cya.memory._mem_path", _broken_mem_path)
# Should not raise
remember_preference("will_fail", "value", scope="broken")
data = recall_preferences(scope="broken")
assert isinstance(data, dict) # still returns something usable
def test_export_memory_observability(isolated_memory):
remember_preference("theme", "dark", scope="export-test")
exported = export_memory(scope="export-test")
assert exported["status"].startswith("real")
assert exported["count"] >= 1
assert "provenance_summary" in exported
assert "phase" in exported
# ---------------------------------------------------------------------------
# T03 (0003) — Activation context tests
# ---------------------------------------------------------------------------
def test_recall_with_activation_context_boosts_matching_scope(isolated_memory):
"""T03: activation_context should boost items matching the provided cwd/git context."""
remember_preference("project_pref", "use --short", scope="my-project")
remember_preference("other_pref", "verbose", scope="other-project")
remember_preference("global_pref", "always show rationale", scope="global")
# Simulate what the orchestrator does for a request in "my-project"
act_ctx = {"cwd": "/code/my-project", "git_root": "/code/my-project"}
data = recall_preferences("my-project", activation_context=act_ctx)
items = data["items"]
keys = [i["key"] for i in items]
# The activation_context must be recorded for observability
prov = data.get("provenance", [{}])[0]
assert prov.get("activation_context") == act_ctx
# project_pref for the matching scope must be present (boosting puts relevant items first)
assert "project_pref" in keys
# At minimum the activation mechanism is exercised (we don't over-assert ordering after the -limit slice)
assert len(keys) >= 1
def test_recall_with_kinds_and_activation_context(isolated_memory):
"""T03 + T04: kinds filter + activation_context work together."""
remember_retrospection_outcome("retro_goal", "be concise in this project", scope="proj-x")
remember_preference("normal_pref", "use emojis", scope="proj-x")
act_ctx = {"cwd": "proj-x"}
data = recall_preferences(
"proj-x",
kinds=[KIND_INTERACTION_GOAL, "retrospection"],
activation_context=act_ctx,
)
kinds = [i.get("kind") for i in data["items"]]
assert KIND_INTERACTION_GOAL in kinds or "retrospection" in kinds
def test_profile_1_reflection_helper_and_activation(isolated_memory):
"""Minimal T05 Profile 1 spike: remember_reflection + preferential recall by kind."""
remember_reflection("lesson_rust", "Always run cargo clippy before suggesting fixes", scope="proj-rust")
data = recall_preferences(
scope="proj-rust",
kinds=[KIND_REFLECTION],
activation_context={"cwd": "proj-rust"},
)
assert len(data.get("items", [])) >= 1
kinds = [i.get("kind") for i in data.get("items", [])]
assert KIND_REFLECTION in kinds
# The helper stored it correctly
assert any("cargo clippy" in str(i.get("value", "")) for i in data.get("items", []))
# ---------------------------------------------------------------------------
# CYA-WP-0005 T02 — Explicit Profile 0 baseline assertions
# ---------------------------------------------------------------------------
# These tests document and assert the characteristics of the current shipped
# memory implementation, now formally named "Profile 0" (see MemoryVision.md
# "Profile 0 Baseline (Post-0003)" and CYA-WP-0005).
# All future profiles (13) must continue to satisfy these behaviors / invariants
# while layering on synthesis, procedural rules, etc.
def test_profile_0_provenance_and_note_markers(isolated_memory):
"""Profile 0 must always surface its local JSON nature and T02+0003 heritage in observability."""
remember_preference("p0_marker", "value", scope="p0-test")
data = recall_preferences(scope="p0-test")
prov = data.get("provenance", [{}])[0]
note = data.get("note", "")
assert "cya-local-memory" in prov.get("source", "")
assert "T02+0003" in prov.get("source", "") or "local json" in note.lower()
assert data.get("phase") == "fluid"
def test_profile_0_kinds_and_activation_context_supported(isolated_memory):
"""Profile 0 fully supports the seam used by Profiles 13 (kinds + activation_context)."""
remember_retrospection_outcome("p0_retrospection", "remember this pattern", scope="p0-proj")
act = {"cwd": "p0-proj", "profile": "default"}
data = recall_preferences(scope="p0-proj", kinds=["retrospection"], activation_context=act)
assert len(data["items"]) >= 1
prov = data.get("provenance", [{}])[0]
assert isinstance(prov, dict) # provenance always present and structured
# ---------------------------------------------------------------------------
# T04 (0003) — Retrospection outcome tests
# ---------------------------------------------------------------------------
def test_remember_and_recall_retrospection_outcomes(isolated_memory):
"""T04: retrospection outcomes are stored with correct kind and retrievable."""
remember_retrospection_outcome(
"interaction_goal", "prefer one-sentence answers when possible", scope="retro-test"
)
remember_retrospection_outcome(
"retrospection_note", "user liked the safety warnings last time", scope="retro-test"
)
# Recall specifically asking for retrospection kinds
data = recall_preferences(
"retro-test",
kinds=[KIND_RETROSPECTION, KIND_INTERACTION_GOAL],
)
keys = {i["key"] for i in data["items"]}
assert "interaction_goal" in keys
assert "retrospection_note" in keys
# They should have the right kinds
for item in data["items"]:
assert item.get("kind") in (KIND_RETROSPECTION, KIND_INTERACTION_GOAL)
def test_export_memory_with_kinds_filter(isolated_memory):
"""T04 observability: export_memory supports kinds filter and reports by_kind."""
remember_preference("normal", "value", scope="kind-test")
remember_retrospection_outcome("goal1", "be direct", scope="kind-test")
full = export_memory(scope="kind-test")
assert "preference" in full.get("by_kind", {})
assert KIND_INTERACTION_GOAL in full.get("by_kind", {}) or "retrospection" in full.get("by_kind", {})
only_goals = export_memory(scope="kind-test", kinds=[KIND_INTERACTION_GOAL])
assert only_goals["count"] >= 1
assert all(i.get("kind") in (KIND_INTERACTION_GOAL, "retrospection") for i in only_goals.get("items", []))
# ---------------------------------------------------------------------------
# Additional graceful degradation + observability (T05)
# ---------------------------------------------------------------------------
def test_recall_with_bad_activation_context_is_graceful(isolated_memory):
"""T05: bad activation_context should not break recall."""
remember_preference("safe", "value", scope="graceful-test")
data = recall_preferences("graceful-test", activation_context={"weird": object()})
assert isinstance(data, dict)
assert "items" in data or "error" in str(data)
def test_export_memory_observability_includes_by_kind(isolated_memory):
"""T05 observability: export now reports by_kind breakdown."""
remember_preference("p1", "v1", scope="obs-test")
remember_retrospection_outcome("g1", "goal", scope="obs-test")
exported = export_memory(scope="obs-test")
assert "by_kind" in exported
assert isinstance(exported["by_kind"], dict)
assert sum(exported["by_kind"].values()) == exported["count"]