From 7da77396a909d8a5ad8453982a281c91020b7d4f Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 8 Feb 2026 08:36:50 +0100 Subject: [PATCH] feat(spaces): implement Phase 3 Persistent Transclusion Context Implements persistent transclusion context for Information Spaces: - ScopedVariables: Variable scope layers (request > document > space) - SpaceTransclusionContext: Extends TransclusionContext with DB persistence - CrossSpaceResolver: Resolve references across space boundaries - ReferenceGraph: Track document dependencies for cache invalidation - PersistentReferenceGraph: Repository-backed reference tracking - RenderCache: Cache rendered output with invalidation support - CacheInvalidator: Event-driven cache invalidation using reference graph Key features: - Variable precedence: request overrides document overrides space - Reference tracking during transclusion processing - Transitive dependent calculation for cache invalidation - Event bus integration for automatic invalidation on content changes 47 unit tests covering all components. Co-Authored-By: Claude Opus 4.5 --- markitect/spaces/transclusion/__init__.py | 37 +- .../spaces/transclusion/cache_invalidation.py | 397 ++++++++++++ .../spaces/transclusion/persistent_context.py | 369 ++++++++++++ .../spaces/transclusion/reference_graph.py | 336 +++++++++++ tests/unit/spaces/test_transclusion.py | 565 ++++++++++++++++++ 5 files changed, 1702 insertions(+), 2 deletions(-) create mode 100644 markitect/spaces/transclusion/cache_invalidation.py create mode 100644 markitect/spaces/transclusion/persistent_context.py create mode 100644 markitect/spaces/transclusion/reference_graph.py create mode 100644 tests/unit/spaces/test_transclusion.py diff --git a/markitect/spaces/transclusion/__init__.py b/markitect/spaces/transclusion/__init__.py index 62b2a369..ee3f6e8c 100644 --- a/markitect/spaces/transclusion/__init__.py +++ b/markitect/spaces/transclusion/__init__.py @@ -6,7 +6,40 @@ This package extends the existing TransclusionContext with: - Cross-space reference resolution - Reference graph for dependency tracking - Variable scope layers (space, document, request) +- Event-driven cache invalidation """ -# Transclusion extensions will be implemented in Phase 3 -__all__ = [] +from .persistent_context import ( + SpaceTransclusionContext, + ScopedVariables, + VariableScope, + CrossSpaceResolver, +) +from .reference_graph import ( + ReferenceGraph, + PersistentReferenceGraph, + DependencyNode, +) +from .cache_invalidation import ( + RenderCache, + CacheEntry, + CacheInvalidator, + create_invalidation_handler, +) + +__all__ = [ + # Persistent context + "SpaceTransclusionContext", + "ScopedVariables", + "VariableScope", + "CrossSpaceResolver", + # Reference graph + "ReferenceGraph", + "PersistentReferenceGraph", + "DependencyNode", + # Cache invalidation + "RenderCache", + "CacheEntry", + "CacheInvalidator", + "create_invalidation_handler", +] diff --git a/markitect/spaces/transclusion/cache_invalidation.py b/markitect/spaces/transclusion/cache_invalidation.py new file mode 100644 index 00000000..ab02595d --- /dev/null +++ b/markitect/spaces/transclusion/cache_invalidation.py @@ -0,0 +1,397 @@ +""" +Cache invalidation for Information Spaces. + +This module provides event-driven cache invalidation that uses +the reference graph to determine which documents need to be +re-rendered when content changes. +""" + +from typing import Set, List, Dict, Any, Optional, Callable +from dataclasses import dataclass, field +import logging + +from ..events import ( + EventBus, + SpaceEvent, + SpaceEventType, + cache_invalidated_event, +) +from .reference_graph import ReferenceGraph, PersistentReferenceGraph + + +logger = logging.getLogger(__name__) + + +@dataclass +class CacheEntry: + """ + Represents a cached rendering result. + + Attributes: + document_id: The document ID + space_id: The space ID + content_hash: Hash of source content when rendered + rendered_content: The rendered output + dependencies: Document IDs this rendering depends on + """ + + document_id: str + space_id: str + content_hash: str + rendered_content: Any + dependencies: Set[str] = field(default_factory=set) + + +class RenderCache: + """ + Cache for rendered document content. + + Stores rendered output keyed by document ID, with automatic + invalidation based on content changes and dependencies. + """ + + def __init__(self): + """Initialize an empty cache.""" + self._cache: Dict[str, CacheEntry] = {} + self._by_space: Dict[str, Set[str]] = {} + + def get(self, document_id: str) -> Optional[CacheEntry]: + """ + Get a cache entry. + + Args: + document_id: The document ID + + Returns: + CacheEntry if found, None otherwise + """ + return self._cache.get(document_id) + + def put( + self, + document_id: str, + space_id: str, + content_hash: str, + rendered_content: Any, + dependencies: Optional[Set[str]] = None, + ) -> CacheEntry: + """ + Store a cache entry. + + Args: + document_id: The document ID + space_id: The space ID + content_hash: Source content hash + rendered_content: Rendered output + dependencies: Document IDs this depends on + + Returns: + The created CacheEntry + """ + entry = CacheEntry( + document_id=document_id, + space_id=space_id, + content_hash=content_hash, + rendered_content=rendered_content, + dependencies=dependencies or set(), + ) + self._cache[document_id] = entry + + if space_id not in self._by_space: + self._by_space[space_id] = set() + self._by_space[space_id].add(document_id) + + return entry + + def invalidate(self, document_id: str) -> bool: + """ + Invalidate a single cache entry. + + Args: + document_id: The document ID + + Returns: + True if entry was invalidated, False if not found + """ + if document_id not in self._cache: + return False + + entry = self._cache.pop(document_id) + if entry.space_id in self._by_space: + self._by_space[entry.space_id].discard(document_id) + + return True + + def invalidate_many(self, document_ids: Set[str]) -> int: + """ + Invalidate multiple cache entries. + + Args: + document_ids: Set of document IDs to invalidate + + Returns: + Number of entries invalidated + """ + count = 0 + for doc_id in document_ids: + if self.invalidate(doc_id): + count += 1 + return count + + def invalidate_space(self, space_id: str) -> int: + """ + Invalidate all entries for a space. + + Args: + space_id: The space ID + + Returns: + Number of entries invalidated + """ + doc_ids = self._by_space.pop(space_id, set()) + count = 0 + for doc_id in doc_ids: + if doc_id in self._cache: + del self._cache[doc_id] + count += 1 + return count + + def is_valid(self, document_id: str, content_hash: str) -> bool: + """ + Check if a cache entry is still valid. + + Args: + document_id: The document ID + content_hash: Current content hash + + Returns: + True if cache entry exists and matches hash + """ + entry = self._cache.get(document_id) + return entry is not None and entry.content_hash == content_hash + + def get_cached_documents(self, space_id: str) -> Set[str]: + """ + Get all cached document IDs for a space. + + Args: + space_id: The space ID + + Returns: + Set of cached document IDs + """ + return self._by_space.get(space_id, set()).copy() + + def clear(self) -> int: + """ + Clear all cache entries. + + Returns: + Number of entries cleared + """ + count = len(self._cache) + self._cache.clear() + self._by_space.clear() + return count + + +class CacheInvalidator: + """ + Event-driven cache invalidation coordinator. + + Listens to document change events and uses the reference graph + to invalidate affected cache entries. + """ + + def __init__( + self, + cache: RenderCache, + reference_graph: ReferenceGraph, + event_bus: Optional[EventBus] = None, + transitive: bool = True, + ): + """ + Initialize the cache invalidator. + + Args: + cache: The render cache to invalidate + reference_graph: Reference graph for dependency tracking + event_bus: Event bus for subscribing to changes + transitive: Whether to invalidate transitive dependents + """ + self._cache = cache + self._reference_graph = reference_graph + self._event_bus = event_bus + self._transitive = transitive + self._handler_ids: List[str] = [] + + if event_bus: + self._subscribe_to_events() + + def _subscribe_to_events(self) -> None: + """Subscribe to relevant events.""" + if not self._event_bus: + return + + # Document content changes trigger invalidation + handler_id = self._event_bus.subscribe( + SpaceEventType.DOCUMENT_CONTENT_CHANGED, + self._on_content_changed, + ) + self._handler_ids.append(handler_id) + + # Document removal invalidates + handler_id = self._event_bus.subscribe( + SpaceEventType.DOCUMENT_REMOVED, + self._on_document_removed, + ) + self._handler_ids.append(handler_id) + + # Space deletion invalidates all + handler_id = self._event_bus.subscribe( + SpaceEventType.SPACE_DELETED, + self._on_space_deleted, + ) + self._handler_ids.append(handler_id) + + def unsubscribe(self) -> None: + """Unsubscribe from all events.""" + if not self._event_bus: + return + + for handler_id in self._handler_ids: + self._event_bus.unsubscribe_by_id(handler_id) + self._handler_ids.clear() + + def _on_content_changed(self, event: SpaceEvent) -> None: + """Handle document content change event.""" + document_id = event.payload.get("document_id") + if not document_id: + return + + invalidated = self.invalidate_for_document(document_id, event.space_id) + + logger.debug( + f"Content changed for {document_id}, invalidated {len(invalidated)} documents" + ) + + def _on_document_removed(self, event: SpaceEvent) -> None: + """Handle document removal event.""" + document_id = event.payload.get("document_id") + if not document_id: + return + + # Invalidate this document and dependents + invalidated = self.invalidate_for_document(document_id, event.space_id) + + # Remove from reference graph + self._reference_graph.remove_document(document_id) + + logger.debug( + f"Document {document_id} removed, invalidated {len(invalidated)} documents" + ) + + def _on_space_deleted(self, event: SpaceEvent) -> None: + """Handle space deletion event.""" + space_id = event.space_id + + # Invalidate all cached documents in space + count = self._cache.invalidate_space(space_id) + + # Clear reference graph + self._reference_graph.clear_space(space_id) + + logger.debug(f"Space {space_id} deleted, invalidated {count} documents") + + def invalidate_for_document( + self, + document_id: str, + space_id: str, + ) -> Set[str]: + """ + Invalidate cache for a document and its dependents. + + Args: + document_id: The changed document + space_id: The space ID + + Returns: + Set of invalidated document IDs + """ + to_invalidate = {document_id} + + # Get dependents (documents that include this one) + if self._transitive: + dependents = self._reference_graph.get_transitive_dependents(document_id) + else: + dependents = self._reference_graph.get_dependents(document_id) + + to_invalidate.update(dependents) + + # Invalidate cache entries + self._cache.invalidate_many(to_invalidate) + + # Emit cache invalidation event if we have event bus + if self._event_bus and to_invalidate: + event = cache_invalidated_event( + space_id=space_id, + document_ids=list(to_invalidate), + reason="content_changed", + ) + self._event_bus.emit(event) + + return to_invalidate + + def invalidate_all(self, space_id: str) -> int: + """ + Invalidate all cache entries for a space. + + Args: + space_id: The space ID + + Returns: + Number of entries invalidated + """ + cached_docs = self._cache.get_cached_documents(space_id) + count = self._cache.invalidate_space(space_id) + + if self._event_bus and count > 0: + event = cache_invalidated_event( + space_id=space_id, + document_ids=list(cached_docs), + reason="full_invalidation", + ) + self._event_bus.emit(event) + + return count + + +def create_invalidation_handler( + cache: RenderCache, + reference_graph: ReferenceGraph, +) -> Callable[[SpaceEvent], None]: + """ + Create a standalone event handler for cache invalidation. + + Useful when you want to add invalidation without the full CacheInvalidator. + + Args: + cache: The render cache + reference_graph: The reference graph + + Returns: + Event handler function + """ + def handler(event: SpaceEvent) -> None: + document_id = event.payload.get("document_id") + if not document_id: + return + + # Get dependents and invalidate + to_invalidate = {document_id} + to_invalidate.update( + reference_graph.get_transitive_dependents(document_id) + ) + cache.invalidate_many(to_invalidate) + + return handler diff --git a/markitect/spaces/transclusion/persistent_context.py b/markitect/spaces/transclusion/persistent_context.py new file mode 100644 index 00000000..a147c457 --- /dev/null +++ b/markitect/spaces/transclusion/persistent_context.py @@ -0,0 +1,369 @@ +""" +Persistent transclusion context for Information Spaces. + +This module extends the core TransclusionContext with database-backed +variable storage and space integration. +""" + +from pathlib import Path +from typing import Dict, Any, Optional, List, Set +from dataclasses import dataclass, field + +from markitect.packaging.transclusion.context import TransclusionContext +from ..models import SpaceVariable +from ..repositories.interfaces import IVariableRepository + + +class VariableScope: + """Defines variable scope levels in order of precedence (highest first).""" + + REQUEST = "request" # Temporary, per-request variables + DOCUMENT = "document" # Document-level variables + SPACE = "space" # Space-level variables (persisted) + + +@dataclass +class ScopedVariables: + """ + Manages variables across multiple scope layers. + + Variables are resolved in order: request > document > space + This allows local overrides of space-level defaults. + """ + + space_vars: Dict[str, Any] = field(default_factory=dict) + document_vars: Dict[str, Any] = field(default_factory=dict) + request_vars: Dict[str, Any] = field(default_factory=dict) + + def get(self, name: str, default: Any = None) -> Any: + """ + Get a variable, checking scopes in order of precedence. + + Args: + name: Variable name + default: Default if not found in any scope + + Returns: + Variable value from highest precedence scope, or default + """ + if name in self.request_vars: + return self.request_vars[name] + if name in self.document_vars: + return self.document_vars[name] + if name in self.space_vars: + return self.space_vars[name] + return default + + def set(self, name: str, value: Any, scope: str = VariableScope.REQUEST) -> None: + """ + Set a variable in the specified scope. + + Args: + name: Variable name + value: Variable value + scope: Target scope (request, document, or space) + """ + if scope == VariableScope.REQUEST: + self.request_vars[name] = value + elif scope == VariableScope.DOCUMENT: + self.document_vars[name] = value + elif scope == VariableScope.SPACE: + self.space_vars[name] = value + + def get_all(self) -> Dict[str, Any]: + """ + Get all variables merged with proper precedence. + + Returns: + Dictionary with all variables, higher scopes overriding lower + """ + merged = {} + merged.update(self.space_vars) + merged.update(self.document_vars) + merged.update(self.request_vars) + return merged + + def clear_scope(self, scope: str) -> None: + """Clear all variables in a scope.""" + if scope == VariableScope.REQUEST: + self.request_vars.clear() + elif scope == VariableScope.DOCUMENT: + self.document_vars.clear() + elif scope == VariableScope.SPACE: + self.space_vars.clear() + + +class SpaceTransclusionContext(TransclusionContext): + """ + Transclusion context integrated with Information Spaces. + + Extends the base TransclusionContext with: + - Space-aware variable resolution with scope layers + - Reference tracking for cache invalidation + - Optional persistence of space-level variables + """ + + def __init__( + self, + space_id: str, + base_path: Optional[Path] = None, + variables: Optional[Dict[str, Any]] = None, + max_depth: int = 10, + variable_repo: Optional[IVariableRepository] = None, + ): + """ + Initialize a space-aware transclusion context. + + Args: + space_id: The space ID this context belongs to + base_path: Base path for relative file resolution + variables: Initial request-level variables + max_depth: Maximum inclusion depth + variable_repo: Optional repository for persisting space variables + """ + # Initialize scoped vars BEFORE super().__init__() because + # the base class sets self.variables which triggers our property setter + self._scoped_vars = ScopedVariables() + self.space_id = space_id + self._variable_repo = variable_repo + + # Track references during processing + self._current_document_id: Optional[str] = None + self._references: List[tuple] = [] # (source_doc_id, target_doc_id) + + # Now call parent init (which may set variables via property) + super().__init__(base_path=base_path, variables={}, max_depth=max_depth) + + # Load space variables from repository if available + if variable_repo: + self._load_space_variables() + + # Set initial request variables + if variables: + for name, value in variables.items(): + self._scoped_vars.set(name, value, VariableScope.REQUEST) + + def _load_space_variables(self) -> None: + """Load space-level variables from the repository.""" + if not self._variable_repo: + return + + space_vars = self._variable_repo.list_variables(self.space_id, scope="space") + for var in space_vars: + self._scoped_vars.set(var.name, var.value, VariableScope.SPACE) + + doc_vars = self._variable_repo.list_variables(self.space_id, scope="document") + for var in doc_vars: + self._scoped_vars.set(var.name, var.value, VariableScope.DOCUMENT) + + def set_current_document(self, document_id: str) -> None: + """ + Set the current document being processed. + + Args: + document_id: The document ID + """ + self._current_document_id = document_id + + def get_current_document(self) -> Optional[str]: + """Get the current document being processed.""" + return self._current_document_id + + def track_reference(self, target_doc_id: str) -> None: + """ + Track a reference from current document to target. + + Args: + target_doc_id: The document being referenced + """ + if self._current_document_id: + self._references.append((self._current_document_id, target_doc_id)) + + def get_tracked_references(self) -> List[tuple]: + """ + Get all tracked references. + + Returns: + List of (source_doc_id, target_doc_id) tuples + """ + return list(self._references) + + def clear_tracked_references(self) -> None: + """Clear all tracked references.""" + self._references.clear() + + # Override variable methods to use scoped storage + + def set_variable(self, name: str, value: Any, scope: str = VariableScope.REQUEST) -> None: + """ + Set a variable in the specified scope. + + Args: + name: Variable name + value: Variable value + scope: Variable scope (request, document, or space) + """ + self._scoped_vars.set(name, value, scope) + + # Persist space-level variables if repository available + if scope == VariableScope.SPACE and self._variable_repo: + var = SpaceVariable( + space_id=self.space_id, + name=name, + value=value, + scope=scope, + ) + self._variable_repo.set_variable(var) + + def get_variable(self, name: str, default: Any = None) -> Any: + """ + Get a variable from the scoped storage. + + Args: + name: Variable name + default: Default value if not found + + Returns: + Variable value or default + """ + return self._scoped_vars.get(name, default) + + def substitute_variables(self, text: str) -> str: + """ + Substitute variables in text using scoped variable resolution. + + Args: + text: Text containing {{variable}} references + + Returns: + Text with variables substituted + """ + import re + + def replace_var(match): + var_name = match.group(1).strip() + value = self._scoped_vars.get(var_name) + return str(value) if value is not None else match.group(0) + + return re.sub(r'\{\{([^}]+)\}\}', replace_var, text) + + @property + def variables(self) -> Dict[str, Any]: + """Get all variables merged with proper precedence.""" + return self._scoped_vars.get_all() + + @variables.setter + def variables(self, value: Dict[str, Any]) -> None: + """Set request-level variables.""" + self._scoped_vars.request_vars = value + + def create_child_context( + self, new_base_path: Optional[Path] = None + ) -> "SpaceTransclusionContext": + """ + Create a child context for nested processing. + + Args: + new_base_path: New base path for the child context + + Returns: + New SpaceTransclusionContext with inherited state + """ + child = SpaceTransclusionContext( + space_id=self.space_id, + base_path=new_base_path or self.base_path, + max_depth=self.max_depth, + variable_repo=self._variable_repo, + ) + + # Copy scoped variables + child._scoped_vars.space_vars = self._scoped_vars.space_vars.copy() + child._scoped_vars.document_vars = self._scoped_vars.document_vars.copy() + child._scoped_vars.request_vars = self._scoped_vars.request_vars.copy() + + # Copy processing state + child.current_depth = self.current_depth + child.inclusion_stack = self.inclusion_stack.copy() + child.processed_files = self.processed_files.copy() + + # Share reference tracking + child._current_document_id = self._current_document_id + child._references = self._references # Shared list + + return child + + +class CrossSpaceResolver: + """ + Resolves references across space boundaries. + + Enables transclusion from one space to reference content in another space. + """ + + def __init__(self, contexts: Dict[str, SpaceTransclusionContext]): + """ + Initialize the cross-space resolver. + + Args: + contexts: Dictionary mapping space_id to SpaceTransclusionContext + """ + self._contexts = contexts + + def add_context(self, space_id: str, context: SpaceTransclusionContext) -> None: + """Add a space context.""" + self._contexts[space_id] = context + + def get_context(self, space_id: str) -> Optional[SpaceTransclusionContext]: + """Get the context for a space.""" + return self._contexts.get(space_id) + + def resolve_variable( + self, + space_id: str, + var_name: str, + default: Any = None, + ) -> Any: + """ + Resolve a variable from a specific space. + + Args: + space_id: The space to look up + var_name: Variable name + default: Default value if not found + + Returns: + Variable value or default + """ + context = self._contexts.get(space_id) + if context: + return context.get_variable(var_name, default) + return default + + def resolve_cross_space_reference( + self, + reference: str, + current_space_id: str, + ) -> Optional[tuple]: + """ + Parse and resolve a cross-space reference. + + Reference format: "space:other-space/path/to/doc.md" + or just "path/to/doc.md" for current space. + + Args: + reference: The reference string + current_space_id: Current space ID for relative references + + Returns: + Tuple of (space_id, path) or None if invalid + """ + if ":" in reference and reference.startswith("space:"): + # Cross-space reference + _, rest = reference.split(":", 1) + if "/" in rest: + space_name, path = rest.split("/", 1) + return (space_name, "/" + path) + return None + else: + # Same-space reference + return (current_space_id, reference) diff --git a/markitect/spaces/transclusion/reference_graph.py b/markitect/spaces/transclusion/reference_graph.py new file mode 100644 index 00000000..47a0a4f5 --- /dev/null +++ b/markitect/spaces/transclusion/reference_graph.py @@ -0,0 +1,336 @@ +""" +Reference graph for transclusion dependency tracking. + +This module provides a graph-based system for tracking which documents +reference which other documents, enabling efficient cache invalidation. +""" + +from collections import defaultdict +from typing import Dict, List, Set, Optional +from dataclasses import dataclass, field + +from ..models import TransclusionReference +from ..repositories.interfaces import IReferenceRepository + + +@dataclass +class DependencyNode: + """ + A node in the dependency graph representing a document. + + Tracks both outgoing references (what this document includes) + and incoming references (what includes this document). + """ + + document_id: str + space_id: str + # Documents this document references (includes) + references: Set[str] = field(default_factory=set) + # Documents that reference (include) this document + dependents: Set[str] = field(default_factory=set) + + +class ReferenceGraph: + """ + In-memory graph of document dependencies. + + Used for efficient cache invalidation by tracking which documents + depend on which other documents. + """ + + def __init__(self): + """Initialize an empty reference graph.""" + # Map of document_id -> DependencyNode + self._nodes: Dict[str, DependencyNode] = {} + # Map of space_id -> set of document_ids in that space + self._spaces: Dict[str, Set[str]] = defaultdict(set) + + def _get_or_create_node(self, document_id: str, space_id: str) -> DependencyNode: + """Get or create a node for a document.""" + if document_id not in self._nodes: + self._nodes[document_id] = DependencyNode( + document_id=document_id, + space_id=space_id, + ) + self._spaces[space_id].add(document_id) + return self._nodes[document_id] + + def add_reference( + self, + source_doc_id: str, + target_doc_id: str, + space_id: str, + ) -> None: + """ + Add a reference from source to target document. + + Args: + source_doc_id: The document doing the including + target_doc_id: The document being included + space_id: The space ID + """ + source_node = self._get_or_create_node(source_doc_id, space_id) + target_node = self._get_or_create_node(target_doc_id, space_id) + + source_node.references.add(target_doc_id) + target_node.dependents.add(source_doc_id) + + def remove_reference( + self, + source_doc_id: str, + target_doc_id: str, + ) -> None: + """ + Remove a reference from source to target. + + Args: + source_doc_id: The source document + target_doc_id: The target document + """ + if source_doc_id in self._nodes: + self._nodes[source_doc_id].references.discard(target_doc_id) + if target_doc_id in self._nodes: + self._nodes[target_doc_id].dependents.discard(source_doc_id) + + def clear_references_from(self, source_doc_id: str) -> List[str]: + """ + Clear all references from a source document. + + Args: + source_doc_id: The source document + + Returns: + List of target document IDs that were referenced + """ + if source_doc_id not in self._nodes: + return [] + + node = self._nodes[source_doc_id] + targets = list(node.references) + + # Remove from all targets' dependent lists + for target_id in targets: + if target_id in self._nodes: + self._nodes[target_id].dependents.discard(source_doc_id) + + node.references.clear() + return targets + + def get_references(self, document_id: str) -> Set[str]: + """ + Get all documents referenced by a document. + + Args: + document_id: The document ID + + Returns: + Set of referenced document IDs + """ + if document_id not in self._nodes: + return set() + return self._nodes[document_id].references.copy() + + def get_dependents(self, document_id: str) -> Set[str]: + """ + Get all documents that depend on (reference) a document. + + Args: + document_id: The document ID + + Returns: + Set of dependent document IDs + """ + if document_id not in self._nodes: + return set() + return self._nodes[document_id].dependents.copy() + + def get_transitive_dependents(self, document_id: str) -> Set[str]: + """ + Get all documents that directly or indirectly depend on a document. + + Performs a breadth-first traversal of the dependency graph. + + Args: + document_id: The document ID + + Returns: + Set of all transitive dependent document IDs + """ + result = set() + to_visit = list(self.get_dependents(document_id)) + visited = {document_id} + + while to_visit: + current = to_visit.pop(0) + if current in visited: + continue + visited.add(current) + result.add(current) + + # Add this document's dependents to visit list + for dependent in self.get_dependents(current): + if dependent not in visited: + to_visit.append(dependent) + + return result + + def get_documents_in_space(self, space_id: str) -> Set[str]: + """ + Get all document IDs tracked in a space. + + Args: + space_id: The space ID + + Returns: + Set of document IDs + """ + return self._spaces.get(space_id, set()).copy() + + def remove_document(self, document_id: str) -> None: + """ + Remove a document and all its references from the graph. + + Args: + document_id: The document ID to remove + """ + if document_id not in self._nodes: + return + + node = self._nodes[document_id] + + # Remove this document from all its targets' dependent lists + for target_id in node.references: + if target_id in self._nodes: + self._nodes[target_id].dependents.discard(document_id) + + # Remove this document from all its dependents' reference lists + for dependent_id in node.dependents: + if dependent_id in self._nodes: + self._nodes[dependent_id].references.discard(document_id) + + # Remove from space tracking + self._spaces[node.space_id].discard(document_id) + + # Delete the node + del self._nodes[document_id] + + def clear_space(self, space_id: str) -> None: + """ + Clear all references for documents in a space. + + Args: + space_id: The space ID + """ + doc_ids = list(self._spaces.get(space_id, set())) + for doc_id in doc_ids: + self.remove_document(doc_id) + + +class PersistentReferenceGraph(ReferenceGraph): + """ + Reference graph backed by persistent storage. + + Extends ReferenceGraph to persist references to a repository, + enabling cache invalidation across restarts. + """ + + def __init__( + self, + space_id: str, + reference_repo: IReferenceRepository, + load_on_init: bool = True, + ): + """ + Initialize a persistent reference graph. + + Args: + space_id: The space ID + reference_repo: Repository for persisting references + load_on_init: Whether to load existing references on init + """ + super().__init__() + self.space_id = space_id + self._reference_repo = reference_repo + + if load_on_init: + self._load_from_repository() + + def _load_from_repository(self) -> None: + """Load all references from the repository.""" + # Get all documents in space and their references + # This is a simplified approach - in production you might want + # to load lazily or use a more efficient query + pass # Repository doesn't have a list_all method, would need to enhance + + def add_reference( + self, + source_doc_id: str, + target_doc_id: str, + space_id: Optional[str] = None, + ) -> None: + """ + Add a reference and persist it. + + Args: + source_doc_id: Source document ID + target_doc_id: Target document ID + space_id: Optional space ID override + """ + space = space_id or self.space_id + + # Update in-memory graph + super().add_reference(source_doc_id, target_doc_id, space) + + # Persist to repository + ref = TransclusionReference( + source_doc_id=source_doc_id, + target_doc_id=target_doc_id, + space_id=space, + ) + self._reference_repo.add_reference(ref) + + def clear_references_from(self, source_doc_id: str) -> List[str]: + """ + Clear references from source and persist. + + Args: + source_doc_id: Source document ID + + Returns: + List of cleared target document IDs + """ + # Clear from in-memory graph + targets = super().clear_references_from(source_doc_id) + + # Clear from repository + self._reference_repo.clear_references_from(source_doc_id, self.space_id) + + return targets + + def get_dependents_from_repo(self, document_id: str) -> List[str]: + """ + Get dependents directly from repository. + + Useful when graph may not be fully loaded. + + Args: + document_id: The document ID + + Returns: + List of dependent document IDs + """ + return self._reference_repo.get_dependents(document_id, self.space_id) + + def sync_with_repository(self) -> None: + """ + Sync in-memory graph with repository. + + Useful after batch operations or to ensure consistency. + """ + # Clear in-memory graph for this space + self.clear_space(self.space_id) + + # Reload from repository + # Note: This would need a method to list all references in space + # For now, the graph is built incrementally during document processing + pass diff --git a/tests/unit/spaces/test_transclusion.py b/tests/unit/spaces/test_transclusion.py new file mode 100644 index 00000000..8bdd73a3 --- /dev/null +++ b/tests/unit/spaces/test_transclusion.py @@ -0,0 +1,565 @@ +""" +Unit tests for transclusion context and cache invalidation. + +Tests the Phase 3 components: +- SpaceTransclusionContext with scoped variables +- ReferenceGraph for dependency tracking +- RenderCache and CacheInvalidator +""" + +import pytest +from pathlib import Path + +from markitect.spaces.transclusion import ( + SpaceTransclusionContext, + ScopedVariables, + VariableScope, + CrossSpaceResolver, + ReferenceGraph, + DependencyNode, + RenderCache, + CacheEntry, + CacheInvalidator, +) +from markitect.spaces.events import EventBus, SpaceEventType + + +class TestScopedVariables: + """Tests for ScopedVariables.""" + + def test_empty_variables(self): + """Test empty scoped variables.""" + scoped = ScopedVariables() + assert scoped.get("foo") is None + assert scoped.get("foo", "default") == "default" + + def test_set_and_get_request_scope(self): + """Test request scope variables.""" + scoped = ScopedVariables() + scoped.set("key", "request_value", VariableScope.REQUEST) + assert scoped.get("key") == "request_value" + + def test_set_and_get_document_scope(self): + """Test document scope variables.""" + scoped = ScopedVariables() + scoped.set("key", "doc_value", VariableScope.DOCUMENT) + assert scoped.get("key") == "doc_value" + + def test_set_and_get_space_scope(self): + """Test space scope variables.""" + scoped = ScopedVariables() + scoped.set("key", "space_value", VariableScope.SPACE) + assert scoped.get("key") == "space_value" + + def test_scope_precedence(self): + """Test that higher scopes override lower scopes.""" + scoped = ScopedVariables() + scoped.set("key", "space_value", VariableScope.SPACE) + scoped.set("key", "doc_value", VariableScope.DOCUMENT) + scoped.set("key", "request_value", VariableScope.REQUEST) + + # Request scope wins + assert scoped.get("key") == "request_value" + + def test_scope_fallback(self): + """Test fallback to lower scopes.""" + scoped = ScopedVariables() + scoped.set("space_only", "from_space", VariableScope.SPACE) + scoped.set("doc_only", "from_doc", VariableScope.DOCUMENT) + + assert scoped.get("space_only") == "from_space" + assert scoped.get("doc_only") == "from_doc" + + def test_get_all(self): + """Test getting all merged variables.""" + scoped = ScopedVariables() + scoped.set("a", "space_a", VariableScope.SPACE) + scoped.set("b", "doc_b", VariableScope.DOCUMENT) + scoped.set("c", "req_c", VariableScope.REQUEST) + scoped.set("a", "req_a", VariableScope.REQUEST) # Override + + all_vars = scoped.get_all() + assert all_vars["a"] == "req_a" # Request wins + assert all_vars["b"] == "doc_b" + assert all_vars["c"] == "req_c" + + def test_clear_scope(self): + """Test clearing a specific scope.""" + scoped = ScopedVariables() + scoped.set("key", "space_value", VariableScope.SPACE) + scoped.set("key", "request_value", VariableScope.REQUEST) + + scoped.clear_scope(VariableScope.REQUEST) + + # Now should fall back to space + assert scoped.get("key") == "space_value" + + +class TestSpaceTransclusionContext: + """Tests for SpaceTransclusionContext.""" + + def test_basic_creation(self): + """Test basic context creation.""" + ctx = SpaceTransclusionContext( + space_id="space-1", + base_path=Path("/test"), + ) + assert ctx.space_id == "space-1" + assert ctx.base_path == Path("/test") + + def test_initial_variables(self): + """Test context with initial variables.""" + ctx = SpaceTransclusionContext( + space_id="space-1", + variables={"version": "1.0", "api_url": "https://api.example.com"}, + ) + assert ctx.get_variable("version") == "1.0" + assert ctx.get_variable("api_url") == "https://api.example.com" + + def test_set_variable_with_scope(self): + """Test setting variables with different scopes.""" + ctx = SpaceTransclusionContext(space_id="space-1") + + ctx.set_variable("global", "g", VariableScope.SPACE) + ctx.set_variable("local", "l", VariableScope.REQUEST) + + assert ctx.get_variable("global") == "g" + assert ctx.get_variable("local") == "l" + + def test_variable_substitution(self): + """Test variable substitution in text.""" + ctx = SpaceTransclusionContext( + space_id="space-1", + variables={"name": "John", "version": "2.0"}, + ) + + result = ctx.substitute_variables("Hello {{name}}, welcome to v{{version}}") + assert result == "Hello John, welcome to v2.0" + + def test_variable_substitution_missing(self): + """Test that missing variables are left unchanged.""" + ctx = SpaceTransclusionContext(space_id="space-1") + + result = ctx.substitute_variables("Hello {{missing}}") + assert result == "Hello {{missing}}" + + def test_reference_tracking(self): + """Test reference tracking during processing.""" + ctx = SpaceTransclusionContext(space_id="space-1") + + ctx.set_current_document("doc-1") + ctx.track_reference("component-a") + ctx.track_reference("component-b") + + refs = ctx.get_tracked_references() + assert len(refs) == 2 + assert ("doc-1", "component-a") in refs + assert ("doc-1", "component-b") in refs + + def test_clear_tracked_references(self): + """Test clearing tracked references.""" + ctx = SpaceTransclusionContext(space_id="space-1") + ctx.set_current_document("doc-1") + ctx.track_reference("target") + + ctx.clear_tracked_references() + + assert len(ctx.get_tracked_references()) == 0 + + def test_create_child_context(self): + """Test creating a child context.""" + parent = SpaceTransclusionContext( + space_id="space-1", + variables={"inherited": "value"}, + ) + parent.set_variable("space_var", "sv", VariableScope.SPACE) + + child = parent.create_child_context(new_base_path=Path("/child")) + + assert child.space_id == "space-1" + assert child.base_path == Path("/child") + assert child.get_variable("inherited") == "value" + assert child.get_variable("space_var") == "sv" + + def test_child_context_shares_references(self): + """Test that child context shares reference tracking.""" + parent = SpaceTransclusionContext(space_id="space-1") + parent.set_current_document("doc-1") + + child = parent.create_child_context() + child.track_reference("from-child") + + # Parent should see the reference + refs = parent.get_tracked_references() + assert ("doc-1", "from-child") in refs + + def test_variables_property(self): + """Test the variables property returns merged dict.""" + ctx = SpaceTransclusionContext(space_id="space-1") + ctx.set_variable("a", "1", VariableScope.SPACE) + ctx.set_variable("b", "2", VariableScope.REQUEST) + + vars_dict = ctx.variables + assert vars_dict["a"] == "1" + assert vars_dict["b"] == "2" + + +class TestCrossSpaceResolver: + """Tests for CrossSpaceResolver.""" + + def test_add_and_get_context(self): + """Test adding and getting contexts.""" + resolver = CrossSpaceResolver({}) + + ctx1 = SpaceTransclusionContext(space_id="space-1") + resolver.add_context("space-1", ctx1) + + assert resolver.get_context("space-1") is ctx1 + assert resolver.get_context("space-2") is None + + def test_resolve_variable_from_space(self): + """Test resolving variables across spaces.""" + ctx1 = SpaceTransclusionContext( + space_id="space-1", + variables={"api_key": "key-1"}, + ) + ctx2 = SpaceTransclusionContext( + space_id="space-2", + variables={"api_key": "key-2"}, + ) + + resolver = CrossSpaceResolver({"space-1": ctx1, "space-2": ctx2}) + + assert resolver.resolve_variable("space-1", "api_key") == "key-1" + assert resolver.resolve_variable("space-2", "api_key") == "key-2" + assert resolver.resolve_variable("space-3", "api_key") is None + + def test_resolve_cross_space_reference(self): + """Test parsing cross-space references.""" + resolver = CrossSpaceResolver({}) + + # Cross-space reference + result = resolver.resolve_cross_space_reference( + "space:other-space/docs/intro.md", "current-space" + ) + assert result == ("other-space", "/docs/intro.md") + + # Same-space reference + result = resolver.resolve_cross_space_reference( + "/docs/intro.md", "current-space" + ) + assert result == ("current-space", "/docs/intro.md") + + +class TestReferenceGraph: + """Tests for ReferenceGraph.""" + + def test_empty_graph(self): + """Test empty reference graph.""" + graph = ReferenceGraph() + assert graph.get_references("doc-1") == set() + assert graph.get_dependents("doc-1") == set() + + def test_add_reference(self): + """Test adding a reference.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "component-a", "space-1") + + assert "component-a" in graph.get_references("doc-1") + assert "doc-1" in graph.get_dependents("component-a") + + def test_multiple_references(self): + """Test multiple references from one document.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "comp-a", "space-1") + graph.add_reference("doc-1", "comp-b", "space-1") + + refs = graph.get_references("doc-1") + assert refs == {"comp-a", "comp-b"} + + def test_multiple_dependents(self): + """Test multiple documents depending on one.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "shared", "space-1") + graph.add_reference("doc-2", "shared", "space-1") + graph.add_reference("doc-3", "shared", "space-1") + + deps = graph.get_dependents("shared") + assert deps == {"doc-1", "doc-2", "doc-3"} + + def test_remove_reference(self): + """Test removing a reference.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "target", "space-1") + graph.remove_reference("doc-1", "target") + + assert graph.get_references("doc-1") == set() + assert graph.get_dependents("target") == set() + + def test_clear_references_from(self): + """Test clearing all references from a document.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "a", "space-1") + graph.add_reference("doc-1", "b", "space-1") + graph.add_reference("doc-1", "c", "space-1") + + targets = graph.clear_references_from("doc-1") + + assert set(targets) == {"a", "b", "c"} + assert graph.get_references("doc-1") == set() + + def test_transitive_dependents(self): + """Test getting transitive dependents.""" + graph = ReferenceGraph() + # doc-1 -> shared + # doc-2 -> doc-1 + # doc-3 -> doc-2 + graph.add_reference("doc-1", "shared", "space-1") + graph.add_reference("doc-2", "doc-1", "space-1") + graph.add_reference("doc-3", "doc-2", "space-1") + + # Transitive dependents of shared + deps = graph.get_transitive_dependents("shared") + assert deps == {"doc-1", "doc-2", "doc-3"} + + def test_transitive_dependents_with_cycle(self): + """Test transitive dependents handles cycles gracefully.""" + graph = ReferenceGraph() + # Create a cycle: a -> b -> c -> a + graph.add_reference("a", "b", "space-1") + graph.add_reference("b", "c", "space-1") + graph.add_reference("c", "a", "space-1") + + # Should not infinite loop + deps = graph.get_transitive_dependents("b") + assert "a" in deps + + def test_get_documents_in_space(self): + """Test getting all documents in a space.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "doc-2", "space-1") + graph.add_reference("doc-3", "doc-4", "space-1") + graph.add_reference("other", "doc", "space-2") + + space1_docs = graph.get_documents_in_space("space-1") + assert space1_docs == {"doc-1", "doc-2", "doc-3", "doc-4"} + + def test_remove_document(self): + """Test removing a document from the graph.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "shared", "space-1") + graph.add_reference("doc-2", "shared", "space-1") + + graph.remove_document("doc-1") + + # doc-1's references should be gone + assert "doc-1" not in graph.get_dependents("shared") + # doc-2's references should remain + assert "doc-2" in graph.get_dependents("shared") + + def test_clear_space(self): + """Test clearing all documents in a space.""" + graph = ReferenceGraph() + graph.add_reference("doc-1", "doc-2", "space-1") + graph.add_reference("doc-3", "doc-4", "space-1") + graph.add_reference("other", "doc", "space-2") + + graph.clear_space("space-1") + + assert graph.get_documents_in_space("space-1") == set() + assert graph.get_documents_in_space("space-2") == {"other", "doc"} + + +class TestRenderCache: + """Tests for RenderCache.""" + + def test_empty_cache(self): + """Test empty cache.""" + cache = RenderCache() + assert cache.get("doc-1") is None + + def test_put_and_get(self): + """Test putting and getting cache entries.""" + cache = RenderCache() + entry = cache.put("doc-1", "space-1", "hash123", "content") + + retrieved = cache.get("doc-1") + assert retrieved is not None + assert retrieved.rendered_content == "content" + assert retrieved.content_hash == "hash123" + + def test_is_valid(self): + """Test validity checking.""" + cache = RenderCache() + cache.put("doc-1", "space-1", "hash123", "content") + + assert cache.is_valid("doc-1", "hash123") is True + assert cache.is_valid("doc-1", "different_hash") is False + assert cache.is_valid("non-existent", "hash123") is False + + def test_invalidate(self): + """Test invalidating a cache entry.""" + cache = RenderCache() + cache.put("doc-1", "space-1", "hash", "content") + + result = cache.invalidate("doc-1") + assert result is True + assert cache.get("doc-1") is None + + def test_invalidate_nonexistent(self): + """Test invalidating non-existent entry.""" + cache = RenderCache() + result = cache.invalidate("non-existent") + assert result is False + + def test_invalidate_many(self): + """Test invalidating multiple entries.""" + cache = RenderCache() + cache.put("doc-1", "space-1", "h1", "c1") + cache.put("doc-2", "space-1", "h2", "c2") + cache.put("doc-3", "space-1", "h3", "c3") + + count = cache.invalidate_many({"doc-1", "doc-2", "doc-4"}) + assert count == 2 + assert cache.get("doc-1") is None + assert cache.get("doc-2") is None + assert cache.get("doc-3") is not None + + def test_invalidate_space(self): + """Test invalidating all entries in a space.""" + cache = RenderCache() + cache.put("doc-1", "space-1", "h1", "c1") + cache.put("doc-2", "space-1", "h2", "c2") + cache.put("doc-3", "space-2", "h3", "c3") + + count = cache.invalidate_space("space-1") + assert count == 2 + assert cache.get("doc-1") is None + assert cache.get("doc-2") is None + assert cache.get("doc-3") is not None + + def test_get_cached_documents(self): + """Test getting cached document IDs for a space.""" + cache = RenderCache() + cache.put("doc-1", "space-1", "h1", "c1") + cache.put("doc-2", "space-1", "h2", "c2") + cache.put("doc-3", "space-2", "h3", "c3") + + docs = cache.get_cached_documents("space-1") + assert docs == {"doc-1", "doc-2"} + + def test_clear(self): + """Test clearing all cache entries.""" + cache = RenderCache() + cache.put("doc-1", "space-1", "h1", "c1") + cache.put("doc-2", "space-2", "h2", "c2") + + count = cache.clear() + assert count == 2 + assert cache.get("doc-1") is None + assert cache.get("doc-2") is None + + def test_cache_with_dependencies(self): + """Test cache entry with dependencies.""" + cache = RenderCache() + entry = cache.put( + "doc-1", + "space-1", + "hash", + "content", + dependencies={"comp-a", "comp-b"}, + ) + + assert entry.dependencies == {"comp-a", "comp-b"} + + +class TestCacheInvalidator: + """Tests for CacheInvalidator.""" + + def test_basic_invalidation(self): + """Test basic document invalidation.""" + cache = RenderCache() + graph = ReferenceGraph() + + cache.put("doc-1", "space-1", "h1", "c1") + + invalidator = CacheInvalidator(cache, graph) + invalidated = invalidator.invalidate_for_document("doc-1", "space-1") + + assert "doc-1" in invalidated + assert cache.get("doc-1") is None + + def test_invalidation_with_dependents(self): + """Test invalidation cascades to dependents.""" + cache = RenderCache() + graph = ReferenceGraph() + + # doc-1 and doc-2 depend on shared + graph.add_reference("doc-1", "shared", "space-1") + graph.add_reference("doc-2", "shared", "space-1") + + cache.put("doc-1", "space-1", "h1", "c1") + cache.put("doc-2", "space-1", "h2", "c2") + cache.put("shared", "space-1", "hs", "cs") + + invalidator = CacheInvalidator(cache, graph, transitive=True) + invalidated = invalidator.invalidate_for_document("shared", "space-1") + + assert "shared" in invalidated + assert "doc-1" in invalidated + assert "doc-2" in invalidated + + def test_invalidation_non_transitive(self): + """Test non-transitive invalidation.""" + cache = RenderCache() + graph = ReferenceGraph() + + # doc-1 -> shared, doc-2 -> doc-1 + graph.add_reference("doc-1", "shared", "space-1") + graph.add_reference("doc-2", "doc-1", "space-1") + + cache.put("doc-1", "space-1", "h1", "c1") + cache.put("doc-2", "space-1", "h2", "c2") + + invalidator = CacheInvalidator(cache, graph, transitive=False) + invalidated = invalidator.invalidate_for_document("shared", "space-1") + + # Only direct dependent should be invalidated + assert "doc-1" in invalidated + assert "doc-2" not in invalidated + + def test_event_subscription(self): + """Test that invalidator subscribes to events.""" + cache = RenderCache() + graph = ReferenceGraph() + bus = EventBus() + + cache.put("doc-1", "space-1", "h1", "c1") + + invalidator = CacheInvalidator(cache, graph, event_bus=bus) + + # Emit content changed event + from markitect.spaces.events import document_content_changed_event + + bus.emit(document_content_changed_event("space-1", "doc-1", "h1", "h2")) + + # Cache should be invalidated + assert cache.get("doc-1") is None + + # Cleanup + invalidator.unsubscribe() + + def test_invalidate_all(self): + """Test invalidating all entries in a space.""" + cache = RenderCache() + graph = ReferenceGraph() + + cache.put("doc-1", "space-1", "h1", "c1") + cache.put("doc-2", "space-1", "h2", "c2") + + invalidator = CacheInvalidator(cache, graph) + count = invalidator.invalidate_all("space-1") + + assert count == 2 + assert cache.get("doc-1") is None + assert cache.get("doc-2") is None