""" Cache invalidation for Information Spaces. This module provides event-driven cache invalidation that uses the reference graph to determine which documents need to be re-rendered when content changes. """ from typing import Set, List, Dict, Any, Optional, Callable from dataclasses import dataclass, field import logging from ..events import ( EventBus, SpaceEvent, SpaceEventType, cache_invalidated_event, ) from .reference_graph import ReferenceGraph, PersistentReferenceGraph logger = logging.getLogger(__name__) @dataclass class CacheEntry: """ Represents a cached rendering result. Attributes: document_id: The document ID space_id: The space ID content_hash: Hash of source content when rendered rendered_content: The rendered output dependencies: Document IDs this rendering depends on """ document_id: str space_id: str content_hash: str rendered_content: Any dependencies: Set[str] = field(default_factory=set) class RenderCache: """ Cache for rendered document content. Stores rendered output keyed by document ID, with automatic invalidation based on content changes and dependencies. """ def __init__(self): """Initialize an empty cache.""" self._cache: Dict[str, CacheEntry] = {} self._by_space: Dict[str, Set[str]] = {} def get(self, document_id: str) -> Optional[CacheEntry]: """ Get a cache entry. Args: document_id: The document ID Returns: CacheEntry if found, None otherwise """ return self._cache.get(document_id) def put( self, document_id: str, space_id: str, content_hash: str, rendered_content: Any, dependencies: Optional[Set[str]] = None, ) -> CacheEntry: """ Store a cache entry. Args: document_id: The document ID space_id: The space ID content_hash: Source content hash rendered_content: Rendered output dependencies: Document IDs this depends on Returns: The created CacheEntry """ entry = CacheEntry( document_id=document_id, space_id=space_id, content_hash=content_hash, rendered_content=rendered_content, dependencies=dependencies or set(), ) self._cache[document_id] = entry if space_id not in self._by_space: self._by_space[space_id] = set() self._by_space[space_id].add(document_id) return entry def invalidate(self, document_id: str) -> bool: """ Invalidate a single cache entry. Args: document_id: The document ID Returns: True if entry was invalidated, False if not found """ if document_id not in self._cache: return False entry = self._cache.pop(document_id) if entry.space_id in self._by_space: self._by_space[entry.space_id].discard(document_id) return True def invalidate_many(self, document_ids: Set[str]) -> int: """ Invalidate multiple cache entries. Args: document_ids: Set of document IDs to invalidate Returns: Number of entries invalidated """ count = 0 for doc_id in document_ids: if self.invalidate(doc_id): count += 1 return count def invalidate_space(self, space_id: str) -> int: """ Invalidate all entries for a space. Args: space_id: The space ID Returns: Number of entries invalidated """ doc_ids = self._by_space.pop(space_id, set()) count = 0 for doc_id in doc_ids: if doc_id in self._cache: del self._cache[doc_id] count += 1 return count def is_valid(self, document_id: str, content_hash: str) -> bool: """ Check if a cache entry is still valid. Args: document_id: The document ID content_hash: Current content hash Returns: True if cache entry exists and matches hash """ entry = self._cache.get(document_id) return entry is not None and entry.content_hash == content_hash def get_cached_documents(self, space_id: str) -> Set[str]: """ Get all cached document IDs for a space. Args: space_id: The space ID Returns: Set of cached document IDs """ return self._by_space.get(space_id, set()).copy() def clear(self) -> int: """ Clear all cache entries. Returns: Number of entries cleared """ count = len(self._cache) self._cache.clear() self._by_space.clear() return count class CacheInvalidator: """ Event-driven cache invalidation coordinator. Listens to document change events and uses the reference graph to invalidate affected cache entries. """ def __init__( self, cache: RenderCache, reference_graph: ReferenceGraph, event_bus: Optional[EventBus] = None, transitive: bool = True, ): """ Initialize the cache invalidator. Args: cache: The render cache to invalidate reference_graph: Reference graph for dependency tracking event_bus: Event bus for subscribing to changes transitive: Whether to invalidate transitive dependents """ self._cache = cache self._reference_graph = reference_graph self._event_bus = event_bus self._transitive = transitive self._handler_ids: List[str] = [] if event_bus: self._subscribe_to_events() def _subscribe_to_events(self) -> None: """Subscribe to relevant events.""" if not self._event_bus: return # Document content changes trigger invalidation handler_id = self._event_bus.subscribe( SpaceEventType.DOCUMENT_CONTENT_CHANGED, self._on_content_changed, ) self._handler_ids.append(handler_id) # Document removal invalidates handler_id = self._event_bus.subscribe( SpaceEventType.DOCUMENT_REMOVED, self._on_document_removed, ) self._handler_ids.append(handler_id) # Space deletion invalidates all handler_id = self._event_bus.subscribe( SpaceEventType.SPACE_DELETED, self._on_space_deleted, ) self._handler_ids.append(handler_id) def unsubscribe(self) -> None: """Unsubscribe from all events.""" if not self._event_bus: return for handler_id in self._handler_ids: self._event_bus.unsubscribe_by_id(handler_id) self._handler_ids.clear() def _on_content_changed(self, event: SpaceEvent) -> None: """Handle document content change event.""" document_id = event.payload.get("document_id") if not document_id: return invalidated = self.invalidate_for_document(document_id, event.space_id) logger.debug( f"Content changed for {document_id}, invalidated {len(invalidated)} documents" ) def _on_document_removed(self, event: SpaceEvent) -> None: """Handle document removal event.""" document_id = event.payload.get("document_id") if not document_id: return # Invalidate this document and dependents invalidated = self.invalidate_for_document(document_id, event.space_id) # Remove from reference graph self._reference_graph.remove_document(document_id) logger.debug( f"Document {document_id} removed, invalidated {len(invalidated)} documents" ) def _on_space_deleted(self, event: SpaceEvent) -> None: """Handle space deletion event.""" space_id = event.space_id # Invalidate all cached documents in space count = self._cache.invalidate_space(space_id) # Clear reference graph self._reference_graph.clear_space(space_id) logger.debug(f"Space {space_id} deleted, invalidated {count} documents") def invalidate_for_document( self, document_id: str, space_id: str, ) -> Set[str]: """ Invalidate cache for a document and its dependents. Args: document_id: The changed document space_id: The space ID Returns: Set of invalidated document IDs """ to_invalidate = {document_id} # Get dependents (documents that include this one) if self._transitive: dependents = self._reference_graph.get_transitive_dependents(document_id) else: dependents = self._reference_graph.get_dependents(document_id) to_invalidate.update(dependents) # Invalidate cache entries self._cache.invalidate_many(to_invalidate) # Emit cache invalidation event if we have event bus if self._event_bus and to_invalidate: event = cache_invalidated_event( space_id=space_id, document_ids=list(to_invalidate), reason="content_changed", ) self._event_bus.emit(event) return to_invalidate def invalidate_all(self, space_id: str) -> int: """ Invalidate all cache entries for a space. Args: space_id: The space ID Returns: Number of entries invalidated """ cached_docs = self._cache.get_cached_documents(space_id) count = self._cache.invalidate_space(space_id) if self._event_bus and count > 0: event = cache_invalidated_event( space_id=space_id, document_ids=list(cached_docs), reason="full_invalidation", ) self._event_bus.emit(event) return count def create_invalidation_handler( cache: RenderCache, reference_graph: ReferenceGraph, ) -> Callable[[SpaceEvent], None]: """ Create a standalone event handler for cache invalidation. Useful when you want to add invalidation without the full CacheInvalidator. Args: cache: The render cache reference_graph: The reference graph Returns: Event handler function """ def handler(event: SpaceEvent) -> None: document_id = event.payload.get("document_id") if not document_id: return # Get dependents and invalidate to_invalidate = {document_id} to_invalidate.update( reference_graph.get_transitive_dependents(document_id) ) cache.invalidate_many(to_invalidate) return handler