feat(spaces): implement Phase 3 Persistent Transclusion Context

Implements persistent transclusion context for Information Spaces:

- ScopedVariables: Variable scope layers (request > document > space)
- SpaceTransclusionContext: Extends TransclusionContext with DB persistence
- CrossSpaceResolver: Resolve references across space boundaries
- ReferenceGraph: Track document dependencies for cache invalidation
- PersistentReferenceGraph: Repository-backed reference tracking
- RenderCache: Cache rendered output with invalidation support
- CacheInvalidator: Event-driven cache invalidation using reference graph

Key features:
- Variable precedence: request overrides document overrides space
- Reference tracking during transclusion processing
- Transitive dependent calculation for cache invalidation
- Event bus integration for automatic invalidation on content changes

47 unit tests covering all components.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 08:36:50 +01:00
parent 0a494b2011
commit 7da77396a9
5 changed files with 1702 additions and 2 deletions

View File

@@ -6,7 +6,40 @@ This package extends the existing TransclusionContext with:
- Cross-space reference resolution
- Reference graph for dependency tracking
- Variable scope layers (space, document, request)
- Event-driven cache invalidation
"""
# Transclusion extensions will be implemented in Phase 3
__all__ = []
from .persistent_context import (
SpaceTransclusionContext,
ScopedVariables,
VariableScope,
CrossSpaceResolver,
)
from .reference_graph import (
ReferenceGraph,
PersistentReferenceGraph,
DependencyNode,
)
from .cache_invalidation import (
RenderCache,
CacheEntry,
CacheInvalidator,
create_invalidation_handler,
)
__all__ = [
# Persistent context
"SpaceTransclusionContext",
"ScopedVariables",
"VariableScope",
"CrossSpaceResolver",
# Reference graph
"ReferenceGraph",
"PersistentReferenceGraph",
"DependencyNode",
# Cache invalidation
"RenderCache",
"CacheEntry",
"CacheInvalidator",
"create_invalidation_handler",
]

View File

@@ -0,0 +1,397 @@
"""
Cache invalidation for Information Spaces.
This module provides event-driven cache invalidation that uses
the reference graph to determine which documents need to be
re-rendered when content changes.
"""
from typing import Set, List, Dict, Any, Optional, Callable
from dataclasses import dataclass, field
import logging
from ..events import (
EventBus,
SpaceEvent,
SpaceEventType,
cache_invalidated_event,
)
from .reference_graph import ReferenceGraph, PersistentReferenceGraph
logger = logging.getLogger(__name__)
@dataclass
class CacheEntry:
"""
Represents a cached rendering result.
Attributes:
document_id: The document ID
space_id: The space ID
content_hash: Hash of source content when rendered
rendered_content: The rendered output
dependencies: Document IDs this rendering depends on
"""
document_id: str
space_id: str
content_hash: str
rendered_content: Any
dependencies: Set[str] = field(default_factory=set)
class RenderCache:
"""
Cache for rendered document content.
Stores rendered output keyed by document ID, with automatic
invalidation based on content changes and dependencies.
"""
def __init__(self):
"""Initialize an empty cache."""
self._cache: Dict[str, CacheEntry] = {}
self._by_space: Dict[str, Set[str]] = {}
def get(self, document_id: str) -> Optional[CacheEntry]:
"""
Get a cache entry.
Args:
document_id: The document ID
Returns:
CacheEntry if found, None otherwise
"""
return self._cache.get(document_id)
def put(
self,
document_id: str,
space_id: str,
content_hash: str,
rendered_content: Any,
dependencies: Optional[Set[str]] = None,
) -> CacheEntry:
"""
Store a cache entry.
Args:
document_id: The document ID
space_id: The space ID
content_hash: Source content hash
rendered_content: Rendered output
dependencies: Document IDs this depends on
Returns:
The created CacheEntry
"""
entry = CacheEntry(
document_id=document_id,
space_id=space_id,
content_hash=content_hash,
rendered_content=rendered_content,
dependencies=dependencies or set(),
)
self._cache[document_id] = entry
if space_id not in self._by_space:
self._by_space[space_id] = set()
self._by_space[space_id].add(document_id)
return entry
def invalidate(self, document_id: str) -> bool:
"""
Invalidate a single cache entry.
Args:
document_id: The document ID
Returns:
True if entry was invalidated, False if not found
"""
if document_id not in self._cache:
return False
entry = self._cache.pop(document_id)
if entry.space_id in self._by_space:
self._by_space[entry.space_id].discard(document_id)
return True
def invalidate_many(self, document_ids: Set[str]) -> int:
"""
Invalidate multiple cache entries.
Args:
document_ids: Set of document IDs to invalidate
Returns:
Number of entries invalidated
"""
count = 0
for doc_id in document_ids:
if self.invalidate(doc_id):
count += 1
return count
def invalidate_space(self, space_id: str) -> int:
"""
Invalidate all entries for a space.
Args:
space_id: The space ID
Returns:
Number of entries invalidated
"""
doc_ids = self._by_space.pop(space_id, set())
count = 0
for doc_id in doc_ids:
if doc_id in self._cache:
del self._cache[doc_id]
count += 1
return count
def is_valid(self, document_id: str, content_hash: str) -> bool:
"""
Check if a cache entry is still valid.
Args:
document_id: The document ID
content_hash: Current content hash
Returns:
True if cache entry exists and matches hash
"""
entry = self._cache.get(document_id)
return entry is not None and entry.content_hash == content_hash
def get_cached_documents(self, space_id: str) -> Set[str]:
"""
Get all cached document IDs for a space.
Args:
space_id: The space ID
Returns:
Set of cached document IDs
"""
return self._by_space.get(space_id, set()).copy()
def clear(self) -> int:
"""
Clear all cache entries.
Returns:
Number of entries cleared
"""
count = len(self._cache)
self._cache.clear()
self._by_space.clear()
return count
class CacheInvalidator:
"""
Event-driven cache invalidation coordinator.
Listens to document change events and uses the reference graph
to invalidate affected cache entries.
"""
def __init__(
self,
cache: RenderCache,
reference_graph: ReferenceGraph,
event_bus: Optional[EventBus] = None,
transitive: bool = True,
):
"""
Initialize the cache invalidator.
Args:
cache: The render cache to invalidate
reference_graph: Reference graph for dependency tracking
event_bus: Event bus for subscribing to changes
transitive: Whether to invalidate transitive dependents
"""
self._cache = cache
self._reference_graph = reference_graph
self._event_bus = event_bus
self._transitive = transitive
self._handler_ids: List[str] = []
if event_bus:
self._subscribe_to_events()
def _subscribe_to_events(self) -> None:
"""Subscribe to relevant events."""
if not self._event_bus:
return
# Document content changes trigger invalidation
handler_id = self._event_bus.subscribe(
SpaceEventType.DOCUMENT_CONTENT_CHANGED,
self._on_content_changed,
)
self._handler_ids.append(handler_id)
# Document removal invalidates
handler_id = self._event_bus.subscribe(
SpaceEventType.DOCUMENT_REMOVED,
self._on_document_removed,
)
self._handler_ids.append(handler_id)
# Space deletion invalidates all
handler_id = self._event_bus.subscribe(
SpaceEventType.SPACE_DELETED,
self._on_space_deleted,
)
self._handler_ids.append(handler_id)
def unsubscribe(self) -> None:
"""Unsubscribe from all events."""
if not self._event_bus:
return
for handler_id in self._handler_ids:
self._event_bus.unsubscribe_by_id(handler_id)
self._handler_ids.clear()
def _on_content_changed(self, event: SpaceEvent) -> None:
"""Handle document content change event."""
document_id = event.payload.get("document_id")
if not document_id:
return
invalidated = self.invalidate_for_document(document_id, event.space_id)
logger.debug(
f"Content changed for {document_id}, invalidated {len(invalidated)} documents"
)
def _on_document_removed(self, event: SpaceEvent) -> None:
"""Handle document removal event."""
document_id = event.payload.get("document_id")
if not document_id:
return
# Invalidate this document and dependents
invalidated = self.invalidate_for_document(document_id, event.space_id)
# Remove from reference graph
self._reference_graph.remove_document(document_id)
logger.debug(
f"Document {document_id} removed, invalidated {len(invalidated)} documents"
)
def _on_space_deleted(self, event: SpaceEvent) -> None:
"""Handle space deletion event."""
space_id = event.space_id
# Invalidate all cached documents in space
count = self._cache.invalidate_space(space_id)
# Clear reference graph
self._reference_graph.clear_space(space_id)
logger.debug(f"Space {space_id} deleted, invalidated {count} documents")
def invalidate_for_document(
self,
document_id: str,
space_id: str,
) -> Set[str]:
"""
Invalidate cache for a document and its dependents.
Args:
document_id: The changed document
space_id: The space ID
Returns:
Set of invalidated document IDs
"""
to_invalidate = {document_id}
# Get dependents (documents that include this one)
if self._transitive:
dependents = self._reference_graph.get_transitive_dependents(document_id)
else:
dependents = self._reference_graph.get_dependents(document_id)
to_invalidate.update(dependents)
# Invalidate cache entries
self._cache.invalidate_many(to_invalidate)
# Emit cache invalidation event if we have event bus
if self._event_bus and to_invalidate:
event = cache_invalidated_event(
space_id=space_id,
document_ids=list(to_invalidate),
reason="content_changed",
)
self._event_bus.emit(event)
return to_invalidate
def invalidate_all(self, space_id: str) -> int:
"""
Invalidate all cache entries for a space.
Args:
space_id: The space ID
Returns:
Number of entries invalidated
"""
cached_docs = self._cache.get_cached_documents(space_id)
count = self._cache.invalidate_space(space_id)
if self._event_bus and count > 0:
event = cache_invalidated_event(
space_id=space_id,
document_ids=list(cached_docs),
reason="full_invalidation",
)
self._event_bus.emit(event)
return count
def create_invalidation_handler(
cache: RenderCache,
reference_graph: ReferenceGraph,
) -> Callable[[SpaceEvent], None]:
"""
Create a standalone event handler for cache invalidation.
Useful when you want to add invalidation without the full CacheInvalidator.
Args:
cache: The render cache
reference_graph: The reference graph
Returns:
Event handler function
"""
def handler(event: SpaceEvent) -> None:
document_id = event.payload.get("document_id")
if not document_id:
return
# Get dependents and invalidate
to_invalidate = {document_id}
to_invalidate.update(
reference_graph.get_transitive_dependents(document_id)
)
cache.invalidate_many(to_invalidate)
return handler

View File

@@ -0,0 +1,369 @@
"""
Persistent transclusion context for Information Spaces.
This module extends the core TransclusionContext with database-backed
variable storage and space integration.
"""
from pathlib import Path
from typing import Dict, Any, Optional, List, Set
from dataclasses import dataclass, field
from markitect.packaging.transclusion.context import TransclusionContext
from ..models import SpaceVariable
from ..repositories.interfaces import IVariableRepository
class VariableScope:
"""Defines variable scope levels in order of precedence (highest first)."""
REQUEST = "request" # Temporary, per-request variables
DOCUMENT = "document" # Document-level variables
SPACE = "space" # Space-level variables (persisted)
@dataclass
class ScopedVariables:
"""
Manages variables across multiple scope layers.
Variables are resolved in order: request > document > space
This allows local overrides of space-level defaults.
"""
space_vars: Dict[str, Any] = field(default_factory=dict)
document_vars: Dict[str, Any] = field(default_factory=dict)
request_vars: Dict[str, Any] = field(default_factory=dict)
def get(self, name: str, default: Any = None) -> Any:
"""
Get a variable, checking scopes in order of precedence.
Args:
name: Variable name
default: Default if not found in any scope
Returns:
Variable value from highest precedence scope, or default
"""
if name in self.request_vars:
return self.request_vars[name]
if name in self.document_vars:
return self.document_vars[name]
if name in self.space_vars:
return self.space_vars[name]
return default
def set(self, name: str, value: Any, scope: str = VariableScope.REQUEST) -> None:
"""
Set a variable in the specified scope.
Args:
name: Variable name
value: Variable value
scope: Target scope (request, document, or space)
"""
if scope == VariableScope.REQUEST:
self.request_vars[name] = value
elif scope == VariableScope.DOCUMENT:
self.document_vars[name] = value
elif scope == VariableScope.SPACE:
self.space_vars[name] = value
def get_all(self) -> Dict[str, Any]:
"""
Get all variables merged with proper precedence.
Returns:
Dictionary with all variables, higher scopes overriding lower
"""
merged = {}
merged.update(self.space_vars)
merged.update(self.document_vars)
merged.update(self.request_vars)
return merged
def clear_scope(self, scope: str) -> None:
"""Clear all variables in a scope."""
if scope == VariableScope.REQUEST:
self.request_vars.clear()
elif scope == VariableScope.DOCUMENT:
self.document_vars.clear()
elif scope == VariableScope.SPACE:
self.space_vars.clear()
class SpaceTransclusionContext(TransclusionContext):
"""
Transclusion context integrated with Information Spaces.
Extends the base TransclusionContext with:
- Space-aware variable resolution with scope layers
- Reference tracking for cache invalidation
- Optional persistence of space-level variables
"""
def __init__(
self,
space_id: str,
base_path: Optional[Path] = None,
variables: Optional[Dict[str, Any]] = None,
max_depth: int = 10,
variable_repo: Optional[IVariableRepository] = None,
):
"""
Initialize a space-aware transclusion context.
Args:
space_id: The space ID this context belongs to
base_path: Base path for relative file resolution
variables: Initial request-level variables
max_depth: Maximum inclusion depth
variable_repo: Optional repository for persisting space variables
"""
# Initialize scoped vars BEFORE super().__init__() because
# the base class sets self.variables which triggers our property setter
self._scoped_vars = ScopedVariables()
self.space_id = space_id
self._variable_repo = variable_repo
# Track references during processing
self._current_document_id: Optional[str] = None
self._references: List[tuple] = [] # (source_doc_id, target_doc_id)
# Now call parent init (which may set variables via property)
super().__init__(base_path=base_path, variables={}, max_depth=max_depth)
# Load space variables from repository if available
if variable_repo:
self._load_space_variables()
# Set initial request variables
if variables:
for name, value in variables.items():
self._scoped_vars.set(name, value, VariableScope.REQUEST)
def _load_space_variables(self) -> None:
"""Load space-level variables from the repository."""
if not self._variable_repo:
return
space_vars = self._variable_repo.list_variables(self.space_id, scope="space")
for var in space_vars:
self._scoped_vars.set(var.name, var.value, VariableScope.SPACE)
doc_vars = self._variable_repo.list_variables(self.space_id, scope="document")
for var in doc_vars:
self._scoped_vars.set(var.name, var.value, VariableScope.DOCUMENT)
def set_current_document(self, document_id: str) -> None:
"""
Set the current document being processed.
Args:
document_id: The document ID
"""
self._current_document_id = document_id
def get_current_document(self) -> Optional[str]:
"""Get the current document being processed."""
return self._current_document_id
def track_reference(self, target_doc_id: str) -> None:
"""
Track a reference from current document to target.
Args:
target_doc_id: The document being referenced
"""
if self._current_document_id:
self._references.append((self._current_document_id, target_doc_id))
def get_tracked_references(self) -> List[tuple]:
"""
Get all tracked references.
Returns:
List of (source_doc_id, target_doc_id) tuples
"""
return list(self._references)
def clear_tracked_references(self) -> None:
"""Clear all tracked references."""
self._references.clear()
# Override variable methods to use scoped storage
def set_variable(self, name: str, value: Any, scope: str = VariableScope.REQUEST) -> None:
"""
Set a variable in the specified scope.
Args:
name: Variable name
value: Variable value
scope: Variable scope (request, document, or space)
"""
self._scoped_vars.set(name, value, scope)
# Persist space-level variables if repository available
if scope == VariableScope.SPACE and self._variable_repo:
var = SpaceVariable(
space_id=self.space_id,
name=name,
value=value,
scope=scope,
)
self._variable_repo.set_variable(var)
def get_variable(self, name: str, default: Any = None) -> Any:
"""
Get a variable from the scoped storage.
Args:
name: Variable name
default: Default value if not found
Returns:
Variable value or default
"""
return self._scoped_vars.get(name, default)
def substitute_variables(self, text: str) -> str:
"""
Substitute variables in text using scoped variable resolution.
Args:
text: Text containing {{variable}} references
Returns:
Text with variables substituted
"""
import re
def replace_var(match):
var_name = match.group(1).strip()
value = self._scoped_vars.get(var_name)
return str(value) if value is not None else match.group(0)
return re.sub(r'\{\{([^}]+)\}\}', replace_var, text)
@property
def variables(self) -> Dict[str, Any]:
"""Get all variables merged with proper precedence."""
return self._scoped_vars.get_all()
@variables.setter
def variables(self, value: Dict[str, Any]) -> None:
"""Set request-level variables."""
self._scoped_vars.request_vars = value
def create_child_context(
self, new_base_path: Optional[Path] = None
) -> "SpaceTransclusionContext":
"""
Create a child context for nested processing.
Args:
new_base_path: New base path for the child context
Returns:
New SpaceTransclusionContext with inherited state
"""
child = SpaceTransclusionContext(
space_id=self.space_id,
base_path=new_base_path or self.base_path,
max_depth=self.max_depth,
variable_repo=self._variable_repo,
)
# Copy scoped variables
child._scoped_vars.space_vars = self._scoped_vars.space_vars.copy()
child._scoped_vars.document_vars = self._scoped_vars.document_vars.copy()
child._scoped_vars.request_vars = self._scoped_vars.request_vars.copy()
# Copy processing state
child.current_depth = self.current_depth
child.inclusion_stack = self.inclusion_stack.copy()
child.processed_files = self.processed_files.copy()
# Share reference tracking
child._current_document_id = self._current_document_id
child._references = self._references # Shared list
return child
class CrossSpaceResolver:
"""
Resolves references across space boundaries.
Enables transclusion from one space to reference content in another space.
"""
def __init__(self, contexts: Dict[str, SpaceTransclusionContext]):
"""
Initialize the cross-space resolver.
Args:
contexts: Dictionary mapping space_id to SpaceTransclusionContext
"""
self._contexts = contexts
def add_context(self, space_id: str, context: SpaceTransclusionContext) -> None:
"""Add a space context."""
self._contexts[space_id] = context
def get_context(self, space_id: str) -> Optional[SpaceTransclusionContext]:
"""Get the context for a space."""
return self._contexts.get(space_id)
def resolve_variable(
self,
space_id: str,
var_name: str,
default: Any = None,
) -> Any:
"""
Resolve a variable from a specific space.
Args:
space_id: The space to look up
var_name: Variable name
default: Default value if not found
Returns:
Variable value or default
"""
context = self._contexts.get(space_id)
if context:
return context.get_variable(var_name, default)
return default
def resolve_cross_space_reference(
self,
reference: str,
current_space_id: str,
) -> Optional[tuple]:
"""
Parse and resolve a cross-space reference.
Reference format: "space:other-space/path/to/doc.md"
or just "path/to/doc.md" for current space.
Args:
reference: The reference string
current_space_id: Current space ID for relative references
Returns:
Tuple of (space_id, path) or None if invalid
"""
if ":" in reference and reference.startswith("space:"):
# Cross-space reference
_, rest = reference.split(":", 1)
if "/" in rest:
space_name, path = rest.split("/", 1)
return (space_name, "/" + path)
return None
else:
# Same-space reference
return (current_space_id, reference)

View File

@@ -0,0 +1,336 @@
"""
Reference graph for transclusion dependency tracking.
This module provides a graph-based system for tracking which documents
reference which other documents, enabling efficient cache invalidation.
"""
from collections import defaultdict
from typing import Dict, List, Set, Optional
from dataclasses import dataclass, field
from ..models import TransclusionReference
from ..repositories.interfaces import IReferenceRepository
@dataclass
class DependencyNode:
"""
A node in the dependency graph representing a document.
Tracks both outgoing references (what this document includes)
and incoming references (what includes this document).
"""
document_id: str
space_id: str
# Documents this document references (includes)
references: Set[str] = field(default_factory=set)
# Documents that reference (include) this document
dependents: Set[str] = field(default_factory=set)
class ReferenceGraph:
"""
In-memory graph of document dependencies.
Used for efficient cache invalidation by tracking which documents
depend on which other documents.
"""
def __init__(self):
"""Initialize an empty reference graph."""
# Map of document_id -> DependencyNode
self._nodes: Dict[str, DependencyNode] = {}
# Map of space_id -> set of document_ids in that space
self._spaces: Dict[str, Set[str]] = defaultdict(set)
def _get_or_create_node(self, document_id: str, space_id: str) -> DependencyNode:
"""Get or create a node for a document."""
if document_id not in self._nodes:
self._nodes[document_id] = DependencyNode(
document_id=document_id,
space_id=space_id,
)
self._spaces[space_id].add(document_id)
return self._nodes[document_id]
def add_reference(
self,
source_doc_id: str,
target_doc_id: str,
space_id: str,
) -> None:
"""
Add a reference from source to target document.
Args:
source_doc_id: The document doing the including
target_doc_id: The document being included
space_id: The space ID
"""
source_node = self._get_or_create_node(source_doc_id, space_id)
target_node = self._get_or_create_node(target_doc_id, space_id)
source_node.references.add(target_doc_id)
target_node.dependents.add(source_doc_id)
def remove_reference(
self,
source_doc_id: str,
target_doc_id: str,
) -> None:
"""
Remove a reference from source to target.
Args:
source_doc_id: The source document
target_doc_id: The target document
"""
if source_doc_id in self._nodes:
self._nodes[source_doc_id].references.discard(target_doc_id)
if target_doc_id in self._nodes:
self._nodes[target_doc_id].dependents.discard(source_doc_id)
def clear_references_from(self, source_doc_id: str) -> List[str]:
"""
Clear all references from a source document.
Args:
source_doc_id: The source document
Returns:
List of target document IDs that were referenced
"""
if source_doc_id not in self._nodes:
return []
node = self._nodes[source_doc_id]
targets = list(node.references)
# Remove from all targets' dependent lists
for target_id in targets:
if target_id in self._nodes:
self._nodes[target_id].dependents.discard(source_doc_id)
node.references.clear()
return targets
def get_references(self, document_id: str) -> Set[str]:
"""
Get all documents referenced by a document.
Args:
document_id: The document ID
Returns:
Set of referenced document IDs
"""
if document_id not in self._nodes:
return set()
return self._nodes[document_id].references.copy()
def get_dependents(self, document_id: str) -> Set[str]:
"""
Get all documents that depend on (reference) a document.
Args:
document_id: The document ID
Returns:
Set of dependent document IDs
"""
if document_id not in self._nodes:
return set()
return self._nodes[document_id].dependents.copy()
def get_transitive_dependents(self, document_id: str) -> Set[str]:
"""
Get all documents that directly or indirectly depend on a document.
Performs a breadth-first traversal of the dependency graph.
Args:
document_id: The document ID
Returns:
Set of all transitive dependent document IDs
"""
result = set()
to_visit = list(self.get_dependents(document_id))
visited = {document_id}
while to_visit:
current = to_visit.pop(0)
if current in visited:
continue
visited.add(current)
result.add(current)
# Add this document's dependents to visit list
for dependent in self.get_dependents(current):
if dependent not in visited:
to_visit.append(dependent)
return result
def get_documents_in_space(self, space_id: str) -> Set[str]:
"""
Get all document IDs tracked in a space.
Args:
space_id: The space ID
Returns:
Set of document IDs
"""
return self._spaces.get(space_id, set()).copy()
def remove_document(self, document_id: str) -> None:
"""
Remove a document and all its references from the graph.
Args:
document_id: The document ID to remove
"""
if document_id not in self._nodes:
return
node = self._nodes[document_id]
# Remove this document from all its targets' dependent lists
for target_id in node.references:
if target_id in self._nodes:
self._nodes[target_id].dependents.discard(document_id)
# Remove this document from all its dependents' reference lists
for dependent_id in node.dependents:
if dependent_id in self._nodes:
self._nodes[dependent_id].references.discard(document_id)
# Remove from space tracking
self._spaces[node.space_id].discard(document_id)
# Delete the node
del self._nodes[document_id]
def clear_space(self, space_id: str) -> None:
"""
Clear all references for documents in a space.
Args:
space_id: The space ID
"""
doc_ids = list(self._spaces.get(space_id, set()))
for doc_id in doc_ids:
self.remove_document(doc_id)
class PersistentReferenceGraph(ReferenceGraph):
"""
Reference graph backed by persistent storage.
Extends ReferenceGraph to persist references to a repository,
enabling cache invalidation across restarts.
"""
def __init__(
self,
space_id: str,
reference_repo: IReferenceRepository,
load_on_init: bool = True,
):
"""
Initialize a persistent reference graph.
Args:
space_id: The space ID
reference_repo: Repository for persisting references
load_on_init: Whether to load existing references on init
"""
super().__init__()
self.space_id = space_id
self._reference_repo = reference_repo
if load_on_init:
self._load_from_repository()
def _load_from_repository(self) -> None:
"""Load all references from the repository."""
# Get all documents in space and their references
# This is a simplified approach - in production you might want
# to load lazily or use a more efficient query
pass # Repository doesn't have a list_all method, would need to enhance
def add_reference(
self,
source_doc_id: str,
target_doc_id: str,
space_id: Optional[str] = None,
) -> None:
"""
Add a reference and persist it.
Args:
source_doc_id: Source document ID
target_doc_id: Target document ID
space_id: Optional space ID override
"""
space = space_id or self.space_id
# Update in-memory graph
super().add_reference(source_doc_id, target_doc_id, space)
# Persist to repository
ref = TransclusionReference(
source_doc_id=source_doc_id,
target_doc_id=target_doc_id,
space_id=space,
)
self._reference_repo.add_reference(ref)
def clear_references_from(self, source_doc_id: str) -> List[str]:
"""
Clear references from source and persist.
Args:
source_doc_id: Source document ID
Returns:
List of cleared target document IDs
"""
# Clear from in-memory graph
targets = super().clear_references_from(source_doc_id)
# Clear from repository
self._reference_repo.clear_references_from(source_doc_id, self.space_id)
return targets
def get_dependents_from_repo(self, document_id: str) -> List[str]:
"""
Get dependents directly from repository.
Useful when graph may not be fully loaded.
Args:
document_id: The document ID
Returns:
List of dependent document IDs
"""
return self._reference_repo.get_dependents(document_id, self.space_id)
def sync_with_repository(self) -> None:
"""
Sync in-memory graph with repository.
Useful after batch operations or to ensure consistency.
"""
# Clear in-memory graph for this space
self.clear_space(self.space_id)
# Reload from repository
# Note: This would need a method to list all references in space
# For now, the graph is built incrementally during document processing
pass

View File

@@ -0,0 +1,565 @@
"""
Unit tests for transclusion context and cache invalidation.
Tests the Phase 3 components:
- SpaceTransclusionContext with scoped variables
- ReferenceGraph for dependency tracking
- RenderCache and CacheInvalidator
"""
import pytest
from pathlib import Path
from markitect.spaces.transclusion import (
SpaceTransclusionContext,
ScopedVariables,
VariableScope,
CrossSpaceResolver,
ReferenceGraph,
DependencyNode,
RenderCache,
CacheEntry,
CacheInvalidator,
)
from markitect.spaces.events import EventBus, SpaceEventType
class TestScopedVariables:
"""Tests for ScopedVariables."""
def test_empty_variables(self):
"""Test empty scoped variables."""
scoped = ScopedVariables()
assert scoped.get("foo") is None
assert scoped.get("foo", "default") == "default"
def test_set_and_get_request_scope(self):
"""Test request scope variables."""
scoped = ScopedVariables()
scoped.set("key", "request_value", VariableScope.REQUEST)
assert scoped.get("key") == "request_value"
def test_set_and_get_document_scope(self):
"""Test document scope variables."""
scoped = ScopedVariables()
scoped.set("key", "doc_value", VariableScope.DOCUMENT)
assert scoped.get("key") == "doc_value"
def test_set_and_get_space_scope(self):
"""Test space scope variables."""
scoped = ScopedVariables()
scoped.set("key", "space_value", VariableScope.SPACE)
assert scoped.get("key") == "space_value"
def test_scope_precedence(self):
"""Test that higher scopes override lower scopes."""
scoped = ScopedVariables()
scoped.set("key", "space_value", VariableScope.SPACE)
scoped.set("key", "doc_value", VariableScope.DOCUMENT)
scoped.set("key", "request_value", VariableScope.REQUEST)
# Request scope wins
assert scoped.get("key") == "request_value"
def test_scope_fallback(self):
"""Test fallback to lower scopes."""
scoped = ScopedVariables()
scoped.set("space_only", "from_space", VariableScope.SPACE)
scoped.set("doc_only", "from_doc", VariableScope.DOCUMENT)
assert scoped.get("space_only") == "from_space"
assert scoped.get("doc_only") == "from_doc"
def test_get_all(self):
"""Test getting all merged variables."""
scoped = ScopedVariables()
scoped.set("a", "space_a", VariableScope.SPACE)
scoped.set("b", "doc_b", VariableScope.DOCUMENT)
scoped.set("c", "req_c", VariableScope.REQUEST)
scoped.set("a", "req_a", VariableScope.REQUEST) # Override
all_vars = scoped.get_all()
assert all_vars["a"] == "req_a" # Request wins
assert all_vars["b"] == "doc_b"
assert all_vars["c"] == "req_c"
def test_clear_scope(self):
"""Test clearing a specific scope."""
scoped = ScopedVariables()
scoped.set("key", "space_value", VariableScope.SPACE)
scoped.set("key", "request_value", VariableScope.REQUEST)
scoped.clear_scope(VariableScope.REQUEST)
# Now should fall back to space
assert scoped.get("key") == "space_value"
class TestSpaceTransclusionContext:
"""Tests for SpaceTransclusionContext."""
def test_basic_creation(self):
"""Test basic context creation."""
ctx = SpaceTransclusionContext(
space_id="space-1",
base_path=Path("/test"),
)
assert ctx.space_id == "space-1"
assert ctx.base_path == Path("/test")
def test_initial_variables(self):
"""Test context with initial variables."""
ctx = SpaceTransclusionContext(
space_id="space-1",
variables={"version": "1.0", "api_url": "https://api.example.com"},
)
assert ctx.get_variable("version") == "1.0"
assert ctx.get_variable("api_url") == "https://api.example.com"
def test_set_variable_with_scope(self):
"""Test setting variables with different scopes."""
ctx = SpaceTransclusionContext(space_id="space-1")
ctx.set_variable("global", "g", VariableScope.SPACE)
ctx.set_variable("local", "l", VariableScope.REQUEST)
assert ctx.get_variable("global") == "g"
assert ctx.get_variable("local") == "l"
def test_variable_substitution(self):
"""Test variable substitution in text."""
ctx = SpaceTransclusionContext(
space_id="space-1",
variables={"name": "John", "version": "2.0"},
)
result = ctx.substitute_variables("Hello {{name}}, welcome to v{{version}}")
assert result == "Hello John, welcome to v2.0"
def test_variable_substitution_missing(self):
"""Test that missing variables are left unchanged."""
ctx = SpaceTransclusionContext(space_id="space-1")
result = ctx.substitute_variables("Hello {{missing}}")
assert result == "Hello {{missing}}"
def test_reference_tracking(self):
"""Test reference tracking during processing."""
ctx = SpaceTransclusionContext(space_id="space-1")
ctx.set_current_document("doc-1")
ctx.track_reference("component-a")
ctx.track_reference("component-b")
refs = ctx.get_tracked_references()
assert len(refs) == 2
assert ("doc-1", "component-a") in refs
assert ("doc-1", "component-b") in refs
def test_clear_tracked_references(self):
"""Test clearing tracked references."""
ctx = SpaceTransclusionContext(space_id="space-1")
ctx.set_current_document("doc-1")
ctx.track_reference("target")
ctx.clear_tracked_references()
assert len(ctx.get_tracked_references()) == 0
def test_create_child_context(self):
"""Test creating a child context."""
parent = SpaceTransclusionContext(
space_id="space-1",
variables={"inherited": "value"},
)
parent.set_variable("space_var", "sv", VariableScope.SPACE)
child = parent.create_child_context(new_base_path=Path("/child"))
assert child.space_id == "space-1"
assert child.base_path == Path("/child")
assert child.get_variable("inherited") == "value"
assert child.get_variable("space_var") == "sv"
def test_child_context_shares_references(self):
"""Test that child context shares reference tracking."""
parent = SpaceTransclusionContext(space_id="space-1")
parent.set_current_document("doc-1")
child = parent.create_child_context()
child.track_reference("from-child")
# Parent should see the reference
refs = parent.get_tracked_references()
assert ("doc-1", "from-child") in refs
def test_variables_property(self):
"""Test the variables property returns merged dict."""
ctx = SpaceTransclusionContext(space_id="space-1")
ctx.set_variable("a", "1", VariableScope.SPACE)
ctx.set_variable("b", "2", VariableScope.REQUEST)
vars_dict = ctx.variables
assert vars_dict["a"] == "1"
assert vars_dict["b"] == "2"
class TestCrossSpaceResolver:
"""Tests for CrossSpaceResolver."""
def test_add_and_get_context(self):
"""Test adding and getting contexts."""
resolver = CrossSpaceResolver({})
ctx1 = SpaceTransclusionContext(space_id="space-1")
resolver.add_context("space-1", ctx1)
assert resolver.get_context("space-1") is ctx1
assert resolver.get_context("space-2") is None
def test_resolve_variable_from_space(self):
"""Test resolving variables across spaces."""
ctx1 = SpaceTransclusionContext(
space_id="space-1",
variables={"api_key": "key-1"},
)
ctx2 = SpaceTransclusionContext(
space_id="space-2",
variables={"api_key": "key-2"},
)
resolver = CrossSpaceResolver({"space-1": ctx1, "space-2": ctx2})
assert resolver.resolve_variable("space-1", "api_key") == "key-1"
assert resolver.resolve_variable("space-2", "api_key") == "key-2"
assert resolver.resolve_variable("space-3", "api_key") is None
def test_resolve_cross_space_reference(self):
"""Test parsing cross-space references."""
resolver = CrossSpaceResolver({})
# Cross-space reference
result = resolver.resolve_cross_space_reference(
"space:other-space/docs/intro.md", "current-space"
)
assert result == ("other-space", "/docs/intro.md")
# Same-space reference
result = resolver.resolve_cross_space_reference(
"/docs/intro.md", "current-space"
)
assert result == ("current-space", "/docs/intro.md")
class TestReferenceGraph:
"""Tests for ReferenceGraph."""
def test_empty_graph(self):
"""Test empty reference graph."""
graph = ReferenceGraph()
assert graph.get_references("doc-1") == set()
assert graph.get_dependents("doc-1") == set()
def test_add_reference(self):
"""Test adding a reference."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "component-a", "space-1")
assert "component-a" in graph.get_references("doc-1")
assert "doc-1" in graph.get_dependents("component-a")
def test_multiple_references(self):
"""Test multiple references from one document."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "comp-a", "space-1")
graph.add_reference("doc-1", "comp-b", "space-1")
refs = graph.get_references("doc-1")
assert refs == {"comp-a", "comp-b"}
def test_multiple_dependents(self):
"""Test multiple documents depending on one."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "shared", "space-1")
graph.add_reference("doc-2", "shared", "space-1")
graph.add_reference("doc-3", "shared", "space-1")
deps = graph.get_dependents("shared")
assert deps == {"doc-1", "doc-2", "doc-3"}
def test_remove_reference(self):
"""Test removing a reference."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "target", "space-1")
graph.remove_reference("doc-1", "target")
assert graph.get_references("doc-1") == set()
assert graph.get_dependents("target") == set()
def test_clear_references_from(self):
"""Test clearing all references from a document."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "a", "space-1")
graph.add_reference("doc-1", "b", "space-1")
graph.add_reference("doc-1", "c", "space-1")
targets = graph.clear_references_from("doc-1")
assert set(targets) == {"a", "b", "c"}
assert graph.get_references("doc-1") == set()
def test_transitive_dependents(self):
"""Test getting transitive dependents."""
graph = ReferenceGraph()
# doc-1 -> shared
# doc-2 -> doc-1
# doc-3 -> doc-2
graph.add_reference("doc-1", "shared", "space-1")
graph.add_reference("doc-2", "doc-1", "space-1")
graph.add_reference("doc-3", "doc-2", "space-1")
# Transitive dependents of shared
deps = graph.get_transitive_dependents("shared")
assert deps == {"doc-1", "doc-2", "doc-3"}
def test_transitive_dependents_with_cycle(self):
"""Test transitive dependents handles cycles gracefully."""
graph = ReferenceGraph()
# Create a cycle: a -> b -> c -> a
graph.add_reference("a", "b", "space-1")
graph.add_reference("b", "c", "space-1")
graph.add_reference("c", "a", "space-1")
# Should not infinite loop
deps = graph.get_transitive_dependents("b")
assert "a" in deps
def test_get_documents_in_space(self):
"""Test getting all documents in a space."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "doc-2", "space-1")
graph.add_reference("doc-3", "doc-4", "space-1")
graph.add_reference("other", "doc", "space-2")
space1_docs = graph.get_documents_in_space("space-1")
assert space1_docs == {"doc-1", "doc-2", "doc-3", "doc-4"}
def test_remove_document(self):
"""Test removing a document from the graph."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "shared", "space-1")
graph.add_reference("doc-2", "shared", "space-1")
graph.remove_document("doc-1")
# doc-1's references should be gone
assert "doc-1" not in graph.get_dependents("shared")
# doc-2's references should remain
assert "doc-2" in graph.get_dependents("shared")
def test_clear_space(self):
"""Test clearing all documents in a space."""
graph = ReferenceGraph()
graph.add_reference("doc-1", "doc-2", "space-1")
graph.add_reference("doc-3", "doc-4", "space-1")
graph.add_reference("other", "doc", "space-2")
graph.clear_space("space-1")
assert graph.get_documents_in_space("space-1") == set()
assert graph.get_documents_in_space("space-2") == {"other", "doc"}
class TestRenderCache:
"""Tests for RenderCache."""
def test_empty_cache(self):
"""Test empty cache."""
cache = RenderCache()
assert cache.get("doc-1") is None
def test_put_and_get(self):
"""Test putting and getting cache entries."""
cache = RenderCache()
entry = cache.put("doc-1", "space-1", "hash123", "<html>content</html>")
retrieved = cache.get("doc-1")
assert retrieved is not None
assert retrieved.rendered_content == "<html>content</html>"
assert retrieved.content_hash == "hash123"
def test_is_valid(self):
"""Test validity checking."""
cache = RenderCache()
cache.put("doc-1", "space-1", "hash123", "content")
assert cache.is_valid("doc-1", "hash123") is True
assert cache.is_valid("doc-1", "different_hash") is False
assert cache.is_valid("non-existent", "hash123") is False
def test_invalidate(self):
"""Test invalidating a cache entry."""
cache = RenderCache()
cache.put("doc-1", "space-1", "hash", "content")
result = cache.invalidate("doc-1")
assert result is True
assert cache.get("doc-1") is None
def test_invalidate_nonexistent(self):
"""Test invalidating non-existent entry."""
cache = RenderCache()
result = cache.invalidate("non-existent")
assert result is False
def test_invalidate_many(self):
"""Test invalidating multiple entries."""
cache = RenderCache()
cache.put("doc-1", "space-1", "h1", "c1")
cache.put("doc-2", "space-1", "h2", "c2")
cache.put("doc-3", "space-1", "h3", "c3")
count = cache.invalidate_many({"doc-1", "doc-2", "doc-4"})
assert count == 2
assert cache.get("doc-1") is None
assert cache.get("doc-2") is None
assert cache.get("doc-3") is not None
def test_invalidate_space(self):
"""Test invalidating all entries in a space."""
cache = RenderCache()
cache.put("doc-1", "space-1", "h1", "c1")
cache.put("doc-2", "space-1", "h2", "c2")
cache.put("doc-3", "space-2", "h3", "c3")
count = cache.invalidate_space("space-1")
assert count == 2
assert cache.get("doc-1") is None
assert cache.get("doc-2") is None
assert cache.get("doc-3") is not None
def test_get_cached_documents(self):
"""Test getting cached document IDs for a space."""
cache = RenderCache()
cache.put("doc-1", "space-1", "h1", "c1")
cache.put("doc-2", "space-1", "h2", "c2")
cache.put("doc-3", "space-2", "h3", "c3")
docs = cache.get_cached_documents("space-1")
assert docs == {"doc-1", "doc-2"}
def test_clear(self):
"""Test clearing all cache entries."""
cache = RenderCache()
cache.put("doc-1", "space-1", "h1", "c1")
cache.put("doc-2", "space-2", "h2", "c2")
count = cache.clear()
assert count == 2
assert cache.get("doc-1") is None
assert cache.get("doc-2") is None
def test_cache_with_dependencies(self):
"""Test cache entry with dependencies."""
cache = RenderCache()
entry = cache.put(
"doc-1",
"space-1",
"hash",
"content",
dependencies={"comp-a", "comp-b"},
)
assert entry.dependencies == {"comp-a", "comp-b"}
class TestCacheInvalidator:
"""Tests for CacheInvalidator."""
def test_basic_invalidation(self):
"""Test basic document invalidation."""
cache = RenderCache()
graph = ReferenceGraph()
cache.put("doc-1", "space-1", "h1", "c1")
invalidator = CacheInvalidator(cache, graph)
invalidated = invalidator.invalidate_for_document("doc-1", "space-1")
assert "doc-1" in invalidated
assert cache.get("doc-1") is None
def test_invalidation_with_dependents(self):
"""Test invalidation cascades to dependents."""
cache = RenderCache()
graph = ReferenceGraph()
# doc-1 and doc-2 depend on shared
graph.add_reference("doc-1", "shared", "space-1")
graph.add_reference("doc-2", "shared", "space-1")
cache.put("doc-1", "space-1", "h1", "c1")
cache.put("doc-2", "space-1", "h2", "c2")
cache.put("shared", "space-1", "hs", "cs")
invalidator = CacheInvalidator(cache, graph, transitive=True)
invalidated = invalidator.invalidate_for_document("shared", "space-1")
assert "shared" in invalidated
assert "doc-1" in invalidated
assert "doc-2" in invalidated
def test_invalidation_non_transitive(self):
"""Test non-transitive invalidation."""
cache = RenderCache()
graph = ReferenceGraph()
# doc-1 -> shared, doc-2 -> doc-1
graph.add_reference("doc-1", "shared", "space-1")
graph.add_reference("doc-2", "doc-1", "space-1")
cache.put("doc-1", "space-1", "h1", "c1")
cache.put("doc-2", "space-1", "h2", "c2")
invalidator = CacheInvalidator(cache, graph, transitive=False)
invalidated = invalidator.invalidate_for_document("shared", "space-1")
# Only direct dependent should be invalidated
assert "doc-1" in invalidated
assert "doc-2" not in invalidated
def test_event_subscription(self):
"""Test that invalidator subscribes to events."""
cache = RenderCache()
graph = ReferenceGraph()
bus = EventBus()
cache.put("doc-1", "space-1", "h1", "c1")
invalidator = CacheInvalidator(cache, graph, event_bus=bus)
# Emit content changed event
from markitect.spaces.events import document_content_changed_event
bus.emit(document_content_changed_event("space-1", "doc-1", "h1", "h2"))
# Cache should be invalidated
assert cache.get("doc-1") is None
# Cleanup
invalidator.unsubscribe()
def test_invalidate_all(self):
"""Test invalidating all entries in a space."""
cache = RenderCache()
graph = ReferenceGraph()
cache.put("doc-1", "space-1", "h1", "c1")
cache.put("doc-2", "space-1", "h2", "c2")
invalidator = CacheInvalidator(cache, graph)
count = invalidator.invalidate_all("space-1")
assert count == 2
assert cache.get("doc-1") is None
assert cache.get("doc-2") is None