Files
markitect-main/markitect/spaces/transclusion/cache_invalidation.py
tegwick 7da77396a9 feat(spaces): implement Phase 3 Persistent Transclusion Context
Implements persistent transclusion context for Information Spaces:

- ScopedVariables: Variable scope layers (request > document > space)
- SpaceTransclusionContext: Extends TransclusionContext with DB persistence
- CrossSpaceResolver: Resolve references across space boundaries
- ReferenceGraph: Track document dependencies for cache invalidation
- PersistentReferenceGraph: Repository-backed reference tracking
- RenderCache: Cache rendered output with invalidation support
- CacheInvalidator: Event-driven cache invalidation using reference graph

Key features:
- Variable precedence: request overrides document overrides space
- Reference tracking during transclusion processing
- Transitive dependent calculation for cache invalidation
- Event bus integration for automatic invalidation on content changes

47 unit tests covering all components.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 08:36:50 +01:00

398 lines
11 KiB
Python

"""
Cache invalidation for Information Spaces.
This module provides event-driven cache invalidation that uses
the reference graph to determine which documents need to be
re-rendered when content changes.
"""
from typing import Set, List, Dict, Any, Optional, Callable
from dataclasses import dataclass, field
import logging
from ..events import (
EventBus,
SpaceEvent,
SpaceEventType,
cache_invalidated_event,
)
from .reference_graph import ReferenceGraph, PersistentReferenceGraph
logger = logging.getLogger(__name__)
@dataclass
class CacheEntry:
"""
Represents a cached rendering result.
Attributes:
document_id: The document ID
space_id: The space ID
content_hash: Hash of source content when rendered
rendered_content: The rendered output
dependencies: Document IDs this rendering depends on
"""
document_id: str
space_id: str
content_hash: str
rendered_content: Any
dependencies: Set[str] = field(default_factory=set)
class RenderCache:
"""
Cache for rendered document content.
Stores rendered output keyed by document ID, with automatic
invalidation based on content changes and dependencies.
"""
def __init__(self):
"""Initialize an empty cache."""
self._cache: Dict[str, CacheEntry] = {}
self._by_space: Dict[str, Set[str]] = {}
def get(self, document_id: str) -> Optional[CacheEntry]:
"""
Get a cache entry.
Args:
document_id: The document ID
Returns:
CacheEntry if found, None otherwise
"""
return self._cache.get(document_id)
def put(
self,
document_id: str,
space_id: str,
content_hash: str,
rendered_content: Any,
dependencies: Optional[Set[str]] = None,
) -> CacheEntry:
"""
Store a cache entry.
Args:
document_id: The document ID
space_id: The space ID
content_hash: Source content hash
rendered_content: Rendered output
dependencies: Document IDs this depends on
Returns:
The created CacheEntry
"""
entry = CacheEntry(
document_id=document_id,
space_id=space_id,
content_hash=content_hash,
rendered_content=rendered_content,
dependencies=dependencies or set(),
)
self._cache[document_id] = entry
if space_id not in self._by_space:
self._by_space[space_id] = set()
self._by_space[space_id].add(document_id)
return entry
def invalidate(self, document_id: str) -> bool:
"""
Invalidate a single cache entry.
Args:
document_id: The document ID
Returns:
True if entry was invalidated, False if not found
"""
if document_id not in self._cache:
return False
entry = self._cache.pop(document_id)
if entry.space_id in self._by_space:
self._by_space[entry.space_id].discard(document_id)
return True
def invalidate_many(self, document_ids: Set[str]) -> int:
"""
Invalidate multiple cache entries.
Args:
document_ids: Set of document IDs to invalidate
Returns:
Number of entries invalidated
"""
count = 0
for doc_id in document_ids:
if self.invalidate(doc_id):
count += 1
return count
def invalidate_space(self, space_id: str) -> int:
"""
Invalidate all entries for a space.
Args:
space_id: The space ID
Returns:
Number of entries invalidated
"""
doc_ids = self._by_space.pop(space_id, set())
count = 0
for doc_id in doc_ids:
if doc_id in self._cache:
del self._cache[doc_id]
count += 1
return count
def is_valid(self, document_id: str, content_hash: str) -> bool:
"""
Check if a cache entry is still valid.
Args:
document_id: The document ID
content_hash: Current content hash
Returns:
True if cache entry exists and matches hash
"""
entry = self._cache.get(document_id)
return entry is not None and entry.content_hash == content_hash
def get_cached_documents(self, space_id: str) -> Set[str]:
"""
Get all cached document IDs for a space.
Args:
space_id: The space ID
Returns:
Set of cached document IDs
"""
return self._by_space.get(space_id, set()).copy()
def clear(self) -> int:
"""
Clear all cache entries.
Returns:
Number of entries cleared
"""
count = len(self._cache)
self._cache.clear()
self._by_space.clear()
return count
class CacheInvalidator:
"""
Event-driven cache invalidation coordinator.
Listens to document change events and uses the reference graph
to invalidate affected cache entries.
"""
def __init__(
self,
cache: RenderCache,
reference_graph: ReferenceGraph,
event_bus: Optional[EventBus] = None,
transitive: bool = True,
):
"""
Initialize the cache invalidator.
Args:
cache: The render cache to invalidate
reference_graph: Reference graph for dependency tracking
event_bus: Event bus for subscribing to changes
transitive: Whether to invalidate transitive dependents
"""
self._cache = cache
self._reference_graph = reference_graph
self._event_bus = event_bus
self._transitive = transitive
self._handler_ids: List[str] = []
if event_bus:
self._subscribe_to_events()
def _subscribe_to_events(self) -> None:
"""Subscribe to relevant events."""
if not self._event_bus:
return
# Document content changes trigger invalidation
handler_id = self._event_bus.subscribe(
SpaceEventType.DOCUMENT_CONTENT_CHANGED,
self._on_content_changed,
)
self._handler_ids.append(handler_id)
# Document removal invalidates
handler_id = self._event_bus.subscribe(
SpaceEventType.DOCUMENT_REMOVED,
self._on_document_removed,
)
self._handler_ids.append(handler_id)
# Space deletion invalidates all
handler_id = self._event_bus.subscribe(
SpaceEventType.SPACE_DELETED,
self._on_space_deleted,
)
self._handler_ids.append(handler_id)
def unsubscribe(self) -> None:
"""Unsubscribe from all events."""
if not self._event_bus:
return
for handler_id in self._handler_ids:
self._event_bus.unsubscribe_by_id(handler_id)
self._handler_ids.clear()
def _on_content_changed(self, event: SpaceEvent) -> None:
"""Handle document content change event."""
document_id = event.payload.get("document_id")
if not document_id:
return
invalidated = self.invalidate_for_document(document_id, event.space_id)
logger.debug(
f"Content changed for {document_id}, invalidated {len(invalidated)} documents"
)
def _on_document_removed(self, event: SpaceEvent) -> None:
"""Handle document removal event."""
document_id = event.payload.get("document_id")
if not document_id:
return
# Invalidate this document and dependents
invalidated = self.invalidate_for_document(document_id, event.space_id)
# Remove from reference graph
self._reference_graph.remove_document(document_id)
logger.debug(
f"Document {document_id} removed, invalidated {len(invalidated)} documents"
)
def _on_space_deleted(self, event: SpaceEvent) -> None:
"""Handle space deletion event."""
space_id = event.space_id
# Invalidate all cached documents in space
count = self._cache.invalidate_space(space_id)
# Clear reference graph
self._reference_graph.clear_space(space_id)
logger.debug(f"Space {space_id} deleted, invalidated {count} documents")
def invalidate_for_document(
self,
document_id: str,
space_id: str,
) -> Set[str]:
"""
Invalidate cache for a document and its dependents.
Args:
document_id: The changed document
space_id: The space ID
Returns:
Set of invalidated document IDs
"""
to_invalidate = {document_id}
# Get dependents (documents that include this one)
if self._transitive:
dependents = self._reference_graph.get_transitive_dependents(document_id)
else:
dependents = self._reference_graph.get_dependents(document_id)
to_invalidate.update(dependents)
# Invalidate cache entries
self._cache.invalidate_many(to_invalidate)
# Emit cache invalidation event if we have event bus
if self._event_bus and to_invalidate:
event = cache_invalidated_event(
space_id=space_id,
document_ids=list(to_invalidate),
reason="content_changed",
)
self._event_bus.emit(event)
return to_invalidate
def invalidate_all(self, space_id: str) -> int:
"""
Invalidate all cache entries for a space.
Args:
space_id: The space ID
Returns:
Number of entries invalidated
"""
cached_docs = self._cache.get_cached_documents(space_id)
count = self._cache.invalidate_space(space_id)
if self._event_bus and count > 0:
event = cache_invalidated_event(
space_id=space_id,
document_ids=list(cached_docs),
reason="full_invalidation",
)
self._event_bus.emit(event)
return count
def create_invalidation_handler(
cache: RenderCache,
reference_graph: ReferenceGraph,
) -> Callable[[SpaceEvent], None]:
"""
Create a standalone event handler for cache invalidation.
Useful when you want to add invalidation without the full CacheInvalidator.
Args:
cache: The render cache
reference_graph: The reference graph
Returns:
Event handler function
"""
def handler(event: SpaceEvent) -> None:
document_id = event.payload.get("document_id")
if not document_id:
return
# Get dependents and invalidate
to_invalidate = {document_id}
to_invalidate.update(
reference_graph.get_transitive_dependents(document_id)
)
cache.invalidate_many(to_invalidate)
return handler