fix: Add missing infrastructure files from data access improvements

Add infrastructure components that were created during issue #24 but not properly committed: - Data access repositories and interfaces - Connection management infrastructure - Exception handling framework - Configuration management - Documentation from data access pattern improvements These files are essential infrastructure components that enable the repository pattern and improved data access strategies. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-27 08:35:34 +02:00
parent 398c45d71c
commit f782ac1f69
8 changed files with 3819 additions and 0 deletions
--- a/infrastructure/repositories/filesystem_repository.py
+++ b/infrastructure/repositories/filesystem_repository.py
@@ -0,0 +1,495 @@
+"""
+Filesystem repository implementation with atomic operations.
+
+Provides reliable file operations with proper error handling,
+atomic writes, and workspace management.
+"""
+
+import os
+import shutil
+import tempfile
+import uuid
+from infrastructure.logging import get_logger
+from typing import List, Optional
+from pathlib import Path
+from datetime import datetime, timedelta
+
+from infrastructure.repositories.interfaces import WorkspaceRepository
+from infrastructure.exceptions import (
+    ErrorContext, OperationType, ResourceNotFoundError,
+    DuplicateResourceError, ValidationError
+)
+
+logger = get_logger(__name__)
+
+
+class FilesystemWorkspaceRepository(WorkspaceRepository):
+    """
+    Filesystem implementation of WorkspaceRepository.
+
+    Provides reliable workspace and file operations with atomic writes,
+    proper validation, and comprehensive error handling.
+    """
+
+    def __init__(self, base_workspace_dir: str = ".markitect_workspace"):
+        self.base_path = Path(base_workspace_dir).resolve()
+        self.base_path.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Initialized workspace repository at {self.base_path}")
+
+    async def create_workspace(
+        self,
+        workspace_id: str,
+        base_path: Path,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """Create a new workspace directory."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"create_workspace_{workspace_id}",
+                operation_type=OperationType.WRITE,
+                resource_type="Workspace",
+                resource_id=workspace_id
+            )
+
+        # Validate workspace ID
+        if not self._is_valid_workspace_id(workspace_id):
+            raise ValidationError(
+                "workspace_id",
+                workspace_id,
+                "Workspace ID must be alphanumeric with optional dashes and underscores",
+                context
+            )
+
+        workspace_path = self.base_path / workspace_id
+
+        # Check if workspace already exists
+        if workspace_path.exists():
+            raise DuplicateResourceError("Workspace", workspace_id, context)
+
+        try:
+            # Create workspace directory with proper permissions
+            workspace_path.mkdir(parents=True, exist_ok=False, mode=0o755)
+
+            # Create standard subdirectories
+            (workspace_path / "files").mkdir(exist_ok=True)
+            (workspace_path / "temp").mkdir(exist_ok=True)
+            (workspace_path / "logs").mkdir(exist_ok=True)
+
+            # Create workspace metadata file
+            metadata = {
+                "id": workspace_id,
+                "created_at": datetime.utcnow().isoformat(),
+                "version": "1.0",
+                "type": "markitect_workspace"
+            }
+
+            await self._write_json_file(
+                workspace_path / ".workspace_meta.json",
+                metadata,
+                context
+            )
+
+            logger.info(f"Created workspace: {workspace_id}")
+            return workspace_path
+
+        except OSError as e:
+            logger.error(f"Failed to create workspace {workspace_id}: {e}")
+            # Cleanup partial creation
+            if workspace_path.exists():
+                shutil.rmtree(workspace_path, ignore_errors=True)
+
+            raise self._map_os_error_to_exception(e, f"create workspace {workspace_id}", context)
+
+    async def get_workspace_path(
+        self,
+        workspace_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """Get the path to a workspace."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_workspace_path_{workspace_id}",
+                operation_type=OperationType.READ,
+                resource_type="Workspace",
+                resource_id=workspace_id
+            )
+
+        workspace_path = self.base_path / workspace_id
+
+        if not workspace_path.exists() or not workspace_path.is_dir():
+            raise ResourceNotFoundError("Workspace", workspace_id, context)
+
+        return workspace_path
+
+    async def list_workspaces(
+        self,
+        context: Optional[ErrorContext] = None
+    ) -> List[str]:
+        """List all available workspaces."""
+        if context is None:
+            context = ErrorContext(
+                operation_id="list_workspaces",
+                operation_type=OperationType.READ,
+                resource_type="Workspace"
+            )
+
+        try:
+            workspaces = []
+
+            if not self.base_path.exists():
+                return workspaces
+
+            for item in self.base_path.iterdir():
+                if item.is_dir() and self._is_valid_workspace_id(item.name):
+                    # Verify it's a valid workspace by checking for metadata
+                    metadata_file = item / ".workspace_meta.json"
+                    if metadata_file.exists():
+                        workspaces.append(item.name)
+
+            return sorted(workspaces)
+
+        except OSError as e:
+            logger.error(f"Failed to list workspaces: {e}")
+            raise self._map_os_error_to_exception(e, "list workspaces", context)
+
+    async def write_file(
+        self,
+        workspace_id: str,
+        file_path: str,
+        content: str,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """Write content to a file in the workspace using atomic operations."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"write_file_{workspace_id}_{file_path}",
+                operation_type=OperationType.WRITE,
+                resource_type="WorkspaceFile",
+                resource_id=f"{workspace_id}/{file_path}",
+                request_data={"content_length": len(content)}
+            )
+
+        # Validate inputs
+        workspace_path = await self.get_workspace_path(workspace_id, context)
+
+        if not self._is_safe_file_path(file_path):
+            raise ValidationError(
+                "file_path",
+                file_path,
+                "File path contains invalid characters or attempts directory traversal",
+                context
+            )
+
+        # Validate file extension
+        allowed_extensions = {".md", ".txt", ".py", ".js", ".json", ".yaml", ".yml", ".rst", ".csv"}
+        file_ext = Path(file_path).suffix.lower()
+        if file_ext and file_ext not in allowed_extensions:
+            raise ValidationError(
+                "file_path",
+                file_path,
+                f"File extension {file_ext} is not allowed",
+                context
+            )
+
+        # Validate content size (100MB limit)
+        max_size = 100 * 1024 * 1024  # 100MB
+        if len(content.encode('utf-8')) > max_size:
+            raise ValidationError(
+                "content",
+                f"{len(content)} characters",
+                f"File content exceeds maximum size of {max_size} bytes",
+                context
+            )
+
+        target_path = workspace_path / "files" / file_path
+
+        try:
+            # Ensure parent directory exists
+            target_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Atomic write using temporary file
+            await self._atomic_write_file(target_path, content, context)
+
+            logger.info(f"Wrote file {file_path} in workspace {workspace_id}")
+            return target_path
+
+        except OSError as e:
+            logger.error(f"Failed to write file {file_path} in workspace {workspace_id}: {e}")
+            raise self._map_os_error_to_exception(e, f"write file {file_path}", context)
+
+    async def read_file(
+        self,
+        workspace_id: str,
+        file_path: str,
+        context: Optional[ErrorContext] = None
+    ) -> str:
+        """Read content from a file in the workspace."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"read_file_{workspace_id}_{file_path}",
+                operation_type=OperationType.READ,
+                resource_type="WorkspaceFile",
+                resource_id=f"{workspace_id}/{file_path}"
+            )
+
+        # Validate inputs
+        workspace_path = await self.get_workspace_path(workspace_id, context)
+
+        if not self._is_safe_file_path(file_path):
+            raise ValidationError(
+                "file_path",
+                file_path,
+                "File path contains invalid characters or attempts directory traversal",
+                context
+            )
+
+        target_path = workspace_path / "files" / file_path
+
+        if not target_path.exists():
+            raise ResourceNotFoundError("File", f"{workspace_id}/{file_path}", context)
+
+        if not target_path.is_file():
+            raise ValidationError(
+                "file_path",
+                file_path,
+                "Path exists but is not a regular file",
+                context
+            )
+
+        try:
+            # Read file with encoding detection
+            content = target_path.read_text(encoding='utf-8')
+
+            logger.debug(f"Read file {file_path} from workspace {workspace_id}")
+            return content
+
+        except UnicodeDecodeError as e:
+            logger.error(f"Failed to decode file {file_path} as UTF-8: {e}")
+            raise ValidationError(
+                "file_content",
+                "binary data",
+                "File does not contain valid UTF-8 text",
+                context
+            )
+
+        except OSError as e:
+            logger.error(f"Failed to read file {file_path} from workspace {workspace_id}: {e}")
+            raise self._map_os_error_to_exception(e, f"read file {file_path}", context)
+
+    async def delete_workspace(
+        self,
+        workspace_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """Delete a workspace and all its contents."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"delete_workspace_{workspace_id}",
+                operation_type=OperationType.DELETE,
+                resource_type="Workspace",
+                resource_id=workspace_id
+            )
+
+        workspace_path = await self.get_workspace_path(workspace_id, context)
+
+        try:
+            # Use shutil.rmtree for recursive deletion
+            shutil.rmtree(workspace_path)
+
+            logger.info(f"Deleted workspace: {workspace_id}")
+            return True
+
+        except OSError as e:
+            logger.error(f"Failed to delete workspace {workspace_id}: {e}")
+            raise self._map_os_error_to_exception(e, f"delete workspace {workspace_id}", context)
+
+    async def list_files(
+        self,
+        workspace_id: str,
+        pattern: Optional[str] = None,
+        context: Optional[ErrorContext] = None
+    ) -> List[str]:
+        """List files in a workspace."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"list_files_{workspace_id}",
+                operation_type=OperationType.READ,
+                resource_type="WorkspaceFile",
+                metadata={"workspace_id": workspace_id, "pattern": pattern}
+            )
+
+        workspace_path = await self.get_workspace_path(workspace_id, context)
+        files_dir = workspace_path / "files"
+
+        if not files_dir.exists():
+            return []
+
+        try:
+            files = []
+
+            # Walk through all files in the workspace
+            for item in files_dir.rglob("*"):
+                if item.is_file():
+                    # Get relative path from files directory
+                    relative_path = str(item.relative_to(files_dir))
+
+                    # Apply pattern filter if provided
+                    if pattern is None or self._matches_pattern(relative_path, pattern):
+                        files.append(relative_path)
+
+            return sorted(files)
+
+        except OSError as e:
+            logger.error(f"Failed to list files in workspace {workspace_id}: {e}")
+            raise self._map_os_error_to_exception(e, f"list files in workspace {workspace_id}", context)
+
+    async def cleanup_old_workspaces(self, days_threshold: int = 30) -> int:
+        """Clean up workspaces older than specified days."""
+        logger.info(f"Starting cleanup of workspaces older than {days_threshold} days")
+
+        try:
+            cutoff_date = datetime.utcnow() - timedelta(days=days_threshold)
+            deleted_count = 0
+
+            if not self.base_path.exists():
+                return 0
+
+            for workspace_dir in self.base_path.iterdir():
+                if not workspace_dir.is_dir():
+                    continue
+
+                try:
+                    # Check workspace metadata for creation date
+                    metadata_file = workspace_dir / ".workspace_meta.json"
+                    if not metadata_file.exists():
+                        continue
+
+                    metadata = await self._read_json_file(metadata_file)
+                    created_at_str = metadata.get("created_at")
+
+                    if not created_at_str:
+                        continue
+
+                    created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
+
+                    if created_at < cutoff_date:
+                        await self.delete_workspace(workspace_dir.name)
+                        deleted_count += 1
+                        logger.info(f"Cleaned up old workspace: {workspace_dir.name}")
+
+                except Exception as e:
+                    logger.warning(f"Failed to process workspace {workspace_dir.name} during cleanup: {e}")
+                    continue
+
+            logger.info(f"Cleanup completed: deleted {deleted_count} old workspaces")
+            return deleted_count
+
+        except Exception as e:
+            logger.error(f"Error during workspace cleanup: {e}")
+            return 0
+
+    # Helper methods
+
+    def _is_valid_workspace_id(self, workspace_id: str) -> bool:
+        """Validate workspace ID format."""
+        if not workspace_id or len(workspace_id) > 100:
+            return False
+
+        # Allow alphanumeric, dash, underscore
+        import re
+        return re.match(r'^[a-zA-Z0-9_-]+$', workspace_id) is not None
+
+    def _is_safe_file_path(self, file_path: str) -> bool:
+        """Check if file path is safe (no directory traversal)."""
+        if not file_path:
+            return False
+
+        # Normalize path
+        normalized = os.path.normpath(file_path)
+
+        # Check for directory traversal attempts
+        if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
+            return False
+
+        # Check for absolute paths
+        if os.path.isabs(normalized):
+            return False
+
+        # Check for unsafe characters
+        unsafe_chars = {"<", ">", ":", "\"", "|", "?", "*", "\0"}
+        if any(char in file_path for char in unsafe_chars):
+            return False
+
+        return True
+
+    def _matches_pattern(self, file_path: str, pattern: str) -> bool:
+        """Check if file path matches the given pattern."""
+        import fnmatch
+        return fnmatch.fnmatch(file_path.lower(), pattern.lower())
+
+    async def _atomic_write_file(self, target_path: Path, content: str, context: ErrorContext):
+        """Write file atomically using temporary file."""
+        temp_dir = target_path.parent / ".tmp"
+        temp_dir.mkdir(exist_ok=True)
+
+        # Create temporary file in same directory as target
+        temp_fd, temp_path = tempfile.mkstemp(
+            dir=temp_dir,
+            prefix=f".tmp_{target_path.name}_",
+            suffix=".tmp"
+        )
+
+        try:
+            # Write content to temporary file
+            with os.fdopen(temp_fd, 'w', encoding='utf-8') as f:
+                f.write(content)
+                f.flush()
+                os.fsync(f.fileno())  # Ensure data is written to disk
+
+            # Atomic move to final location
+            temp_path_obj = Path(temp_path)
+            temp_path_obj.replace(target_path)
+
+        except Exception:
+            # Clean up temporary file on error
+            try:
+                os.unlink(temp_path)
+            except OSError:
+                pass
+            raise
+
+        finally:
+            # Clean up temp directory if empty
+            try:
+                temp_dir.rmdir()
+            except OSError:
+                pass  # Directory not empty or doesn't exist
+
+    async def _write_json_file(self, file_path: Path, data: dict, context: Optional[ErrorContext] = None):
+        """Write JSON data to file atomically."""
+        import json
+        json_content = json.dumps(data, indent=2)
+        await self._atomic_write_file(file_path, json_content, context)
+
+    async def _read_json_file(self, file_path: Path) -> dict:
+        """Read JSON data from file."""
+        import json
+        content = file_path.read_text(encoding='utf-8')
+        return json.loads(content)
+
+    def _map_os_error_to_exception(self, os_error: OSError, operation: str, context: ErrorContext):
+        """Map OS errors to appropriate domain exceptions."""
+        from infrastructure.exceptions import (
+            ResourceNotFoundError, ValidationError, DatabaseError
+        )
+
+        if os_error.errno == 2:  # No such file or directory
+            return ResourceNotFoundError("File", operation, context)
+        elif os_error.errno == 13:  # Permission denied
+            return ValidationError("permissions", operation, "Permission denied", context)
+        elif os_error.errno == 28:  # No space left on device
+            return DatabaseError(f"Insufficient disk space for {operation}", os_error, context)
+        elif os_error.errno == 17:  # File exists
+            return DuplicateResourceError("File", operation, context)
+        else:
+            return DatabaseError(f"Filesystem error during {operation}", os_error, context)
--- a/infrastructure/repositories/gitea_repository.py
+++ b/infrastructure/repositories/gitea_repository.py
@@ -0,0 +1,618 @@
+"""
+Gitea repository implementation with async HTTP client.
+
+Provides high-performance, reliable access to Gitea API with connection pooling,
+retry mechanisms, and proper error handling.
+"""
+
+import asyncio
+import json
+from infrastructure.logging import get_logger
+from typing import List, Optional, Dict, Any
+from datetime import datetime
+
+import aiohttp
+
+from domain.issues.models import Issue, Label, IssueState
+from domain.projects.models import Project, Milestone, ProjectState
+from infrastructure.repositories.interfaces import IssueRepository, ProjectRepository
+from infrastructure.connection_manager import ConnectionManager, retry_with_backoff, RetryConfig
+from infrastructure.exceptions import (
+    ErrorContext, OperationType, GiteaApiError, NetworkError,
+    ResourceNotFoundError, ValidationError, ConcurrencyError
+)
+
+logger = get_logger(__name__)
+
+
+class GiteaIssueRepository(IssueRepository):
+    """
+    Gitea implementation of IssueRepository using async HTTP client.
+
+    Provides efficient access to Gitea issues API with connection pooling,
+    automatic retries, and proper error handling.
+    """
+
+    def __init__(self, connection_manager: ConnectionManager, retry_config: Optional[RetryConfig] = None):
+        self.connection_manager = connection_manager
+        self.retry_config = retry_config or RetryConfig()
+
+    @retry_with_backoff(RetryConfig())
+    async def get_issue(self, issue_number: int, context: Optional[ErrorContext] = None) -> Issue:
+        """Retrieve an issue by its number from Gitea API."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_issue_{issue_number}",
+                operation_type=OperationType.READ,
+                resource_type="Issue",
+                resource_id=str(issue_number)
+            )
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            async with session.get(f"/api/v1/repos/issues/{issue_number}") as response:
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                return self._map_api_issue_to_domain(data)
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error getting issue {issue_number}: {e}")
+            raise NetworkError(f"get issue {issue_number}", e, context)
+
+    @retry_with_backoff(RetryConfig())
+    async def get_issues(
+        self,
+        project_id: Optional[str] = None,
+        state: Optional[str] = None,
+        labels: Optional[List[str]] = None,
+        limit: int = 100,
+        offset: int = 0,
+        context: Optional[ErrorContext] = None
+    ) -> List[Issue]:
+        """Retrieve multiple issues with filtering and pagination."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_issues_{project_id or 'all'}",
+                operation_type=OperationType.READ,
+                resource_type="Issue",
+                metadata={
+                    "project_id": project_id,
+                    "state": state,
+                    "labels": labels,
+                    "limit": limit,
+                    "offset": offset
+                }
+            )
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            # Build query parameters
+            params = {
+                "limit": limit,
+                "page": (offset // limit) + 1  # Gitea uses 1-based pagination
+            }
+
+            if state:
+                params["state"] = state
+
+            if labels:
+                params["labels"] = ",".join(labels)
+
+            async with session.get("/api/v1/repos/issues", params=params) as response:
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                return [self._map_api_issue_to_domain(issue_data) for issue_data in data]
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error getting issues: {e}")
+            raise NetworkError("get issues", e, context)
+
+    @retry_with_backoff(RetryConfig())
+    async def create_issue(
+        self,
+        title: str,
+        body: str,
+        labels: Optional[List[str]] = None,
+        assignees: Optional[List[str]] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Issue:
+        """Create a new issue via Gitea API."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"create_issue_{title[:50]}",
+                operation_type=OperationType.WRITE,
+                resource_type="Issue",
+                request_data={
+                    "title": title,
+                    "body": body,
+                    "labels": labels,
+                    "assignees": assignees
+                }
+            )
+
+        # Validate input
+        if not title or not title.strip():
+            raise ValidationError("title", title, "Title cannot be empty", context)
+
+        if len(title) > 255:
+            raise ValidationError("title", title, "Title cannot exceed 255 characters", context)
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            # Prepare request payload
+            payload = {
+                "title": title.strip(),
+                "body": body or ""
+            }
+
+            if labels:
+                payload["labels"] = labels
+
+            if assignees:
+                payload["assignees"] = assignees
+
+            async with session.post("/api/v1/repos/issues", json=payload) as response:
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                created_issue = self._map_api_issue_to_domain(data)
+
+                logger.info(f"Created issue #{created_issue.number}: {title}")
+                return created_issue
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error creating issue '{title}': {e}")
+            raise NetworkError(f"create issue '{title}'", e, context)
+
+    @retry_with_backoff(RetryConfig())
+    async def update_issue(
+        self,
+        issue_number: int,
+        title: Optional[str] = None,
+        body: Optional[str] = None,
+        state: Optional[str] = None,
+        labels: Optional[List[str]] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Issue:
+        """Update an existing issue via Gitea API."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"update_issue_{issue_number}",
+                operation_type=OperationType.UPDATE,
+                resource_type="Issue",
+                resource_id=str(issue_number),
+                request_data={
+                    "title": title,
+                    "body": body,
+                    "state": state,
+                    "labels": labels
+                }
+            )
+
+        # Validate input
+        if title is not None:
+            if not title.strip():
+                raise ValidationError("title", title, "Title cannot be empty", context)
+            if len(title) > 255:
+                raise ValidationError("title", title, "Title cannot exceed 255 characters", context)
+
+        if state is not None and state not in ["open", "closed"]:
+            raise ValidationError("state", state, "State must be 'open' or 'closed'", context)
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            # First, get current issue to check for concurrent modifications
+            current_issue = await self.get_issue(issue_number, context)
+
+            # Prepare update payload
+            payload = {}
+
+            if title is not None:
+                payload["title"] = title.strip()
+
+            if body is not None:
+                payload["body"] = body
+
+            if state is not None:
+                payload["state"] = state
+
+            if labels is not None:
+                payload["labels"] = labels
+
+            # Only update if there are changes
+            if not payload:
+                return current_issue
+
+            async with session.patch(f"/api/v1/repos/issues/{issue_number}", json=payload) as response:
+                # Handle potential concurrent modification
+                if response.status == 409:
+                    raise ConcurrencyError("Issue", str(issue_number), context)
+
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                updated_issue = self._map_api_issue_to_domain(data)
+
+                logger.info(f"Updated issue #{issue_number}")
+                return updated_issue
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error updating issue {issue_number}: {e}")
+            raise NetworkError(f"update issue {issue_number}", e, context)
+
+    async def get_issue_project_info(
+        self,
+        issue_number: int,
+        context: Optional[ErrorContext] = None
+    ) -> Dict[str, Any]:
+        """Get project-related information for an issue."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_issue_project_info_{issue_number}",
+                operation_type=OperationType.READ,
+                resource_type="ProjectInfo",
+                resource_id=str(issue_number)
+            )
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            # Get issue details first
+            issue = await self.get_issue(issue_number, context)
+
+            # Get repository information
+            async with session.get("/api/v1/repos") as response:
+                await self._handle_response_errors(response, context)
+                repo_data = await response.json()
+
+            # Get project boards if available
+            project_info = {
+                "repository": repo_data,
+                "kanban_columns": ["Todo", "In Progress", "Review", "Done"],  # Default columns
+                "issue": {
+                    "number": issue.number,
+                    "title": issue.title,
+                    "state": issue.state.value,
+                    "labels": [label.name for label in issue.labels]
+                }
+            }
+
+            # Try to get actual project boards
+            try:
+                async with session.get("/api/v1/repos/projects") as projects_response:
+                    if projects_response.status == 200:
+                        projects_data = await projects_response.json()
+                        if projects_data:
+                            # Use first project's columns if available
+                            project_info["projects"] = projects_data
+            except Exception:
+                # Projects API might not be available, use defaults
+                pass
+
+            return project_info
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error getting project info for issue {issue_number}: {e}")
+            raise NetworkError(f"get project info for issue {issue_number}", e, context)
+
+    def _map_api_issue_to_domain(self, api_data: Dict[str, Any]) -> Issue:
+        """Map Gitea API issue data to domain Issue object."""
+        # Map labels
+        labels = []
+        if "labels" in api_data:
+            for label_data in api_data["labels"]:
+                label = Label(
+                    name=label_data["name"],
+                    color=label_data.get("color", ""),
+                    description=label_data.get("description", "")
+                )
+                labels.append(label)
+
+        # Map state
+        state_value = api_data.get("state", "open")
+        issue_state = IssueState.OPEN if state_value == "open" else IssueState.CLOSED
+
+        # Parse dates
+        created_at = datetime.fromisoformat(api_data["created_at"].replace("Z", "+00:00"))
+        updated_at = datetime.fromisoformat(api_data["updated_at"].replace("Z", "+00:00"))
+
+        closed_at = None
+        if api_data.get("closed_at"):
+            closed_at = datetime.fromisoformat(api_data["closed_at"].replace("Z", "+00:00"))
+
+        return Issue(
+            number=api_data["number"],
+            title=api_data["title"],
+            body=api_data.get("body", ""),
+            state=issue_state,
+            labels=labels,
+            assignees=api_data.get("assignees", []),
+            author=api_data.get("user", {}).get("login", "unknown"),
+            created_at=created_at,
+            updated_at=updated_at,
+            closed_at=closed_at,
+            url=api_data.get("html_url", "")
+        )
+
+    async def _handle_response_errors(self, response: aiohttp.ClientResponse, context: ErrorContext):
+        """Handle HTTP response errors and convert to appropriate exceptions."""
+        if response.status == 200 or response.status == 201:
+            return
+
+        response_text = await response.text()
+
+        if response.status == 404:
+            resource_id = context.resource_id or "unknown"
+            raise ResourceNotFoundError(context.resource_type, resource_id, context)
+
+        elif response.status == 401:
+            raise GiteaApiError(
+                response.status,
+                "Authentication failed - check API token",
+                str(response.url),
+                context
+            )
+
+        elif response.status == 403:
+            raise GiteaApiError(
+                response.status,
+                "Access forbidden - check API permissions",
+                str(response.url),
+                context
+            )
+
+        elif response.status == 409:
+            # Conflict - usually concurrent modification
+            raise ConcurrencyError(context.resource_type, context.resource_id or "unknown", context)
+
+        elif response.status == 422:
+            # Validation error
+            try:
+                error_data = await response.json()
+                error_message = error_data.get("message", response_text)
+            except:
+                error_message = response_text
+
+            raise ValidationError("request", None, error_message, context)
+
+        elif response.status >= 500:
+            raise GiteaApiError(
+                response.status,
+                f"Server error: {response_text}",
+                str(response.url),
+                context
+            )
+
+        else:
+            raise GiteaApiError(
+                response.status,
+                response_text,
+                str(response.url),
+                context
+            )
+
+
+class GiteaProjectRepository(ProjectRepository):
+    """
+    Gitea implementation of ProjectRepository.
+
+    Provides access to project and milestone information via Gitea API.
+    """
+
+    def __init__(self, connection_manager: ConnectionManager, retry_config: Optional[RetryConfig] = None):
+        self.connection_manager = connection_manager
+        self.retry_config = retry_config or RetryConfig()
+
+    @retry_with_backoff(RetryConfig())
+    async def get_project(self, project_id: str, context: Optional[ErrorContext] = None) -> Project:
+        """Retrieve a project by its ID from Gitea API."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_project_{project_id}",
+                operation_type=OperationType.READ,
+                resource_type="Project",
+                resource_id=project_id
+            )
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            async with session.get(f"/api/v1/repos/projects/{project_id}") as response:
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                return self._map_api_project_to_domain(data)
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error getting project {project_id}: {e}")
+            raise NetworkError(f"get project {project_id}", e, context)
+
+    @retry_with_backoff(RetryConfig())
+    async def get_projects(
+        self,
+        organization: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0,
+        context: Optional[ErrorContext] = None
+    ) -> List[Project]:
+        """Retrieve multiple projects with pagination."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_projects_{organization or 'all'}",
+                operation_type=OperationType.READ,
+                resource_type="Project",
+                metadata={
+                    "organization": organization,
+                    "limit": limit,
+                    "offset": offset
+                }
+            )
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            params = {
+                "limit": limit,
+                "page": (offset // limit) + 1
+            }
+
+            endpoint = "/api/v1/repos/projects"
+            if organization:
+                endpoint = f"/api/v1/orgs/{organization}/projects"
+
+            async with session.get(endpoint, params=params) as response:
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                return [self._map_api_project_to_domain(project_data) for project_data in data]
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error getting projects: {e}")
+            raise NetworkError("get projects", e, context)
+
+    @retry_with_backoff(RetryConfig())
+    async def get_milestones(
+        self,
+        project_id: str,
+        state: Optional[str] = None,
+        context: Optional[ErrorContext] = None
+    ) -> List[Milestone]:
+        """Retrieve milestones for a project."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_milestones_{project_id}",
+                operation_type=OperationType.READ,
+                resource_type="Milestone",
+                metadata={"project_id": project_id, "state": state}
+            )
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            params = {}
+            if state:
+                params["state"] = state
+
+            async with session.get(f"/api/v1/repos/milestones", params=params) as response:
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                return [self._map_api_milestone_to_domain(milestone_data) for milestone_data in data]
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error getting milestones for project {project_id}: {e}")
+            raise NetworkError(f"get milestones for project {project_id}", e, context)
+
+    @retry_with_backoff(RetryConfig())
+    async def create_milestone(
+        self,
+        project_id: str,
+        title: str,
+        description: str,
+        due_date: Optional[str] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Milestone:
+        """Create a new milestone for a project."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"create_milestone_{title[:50]}",
+                operation_type=OperationType.WRITE,
+                resource_type="Milestone",
+                request_data={
+                    "project_id": project_id,
+                    "title": title,
+                    "description": description,
+                    "due_date": due_date
+                }
+            )
+
+        # Validate input
+        if not title or not title.strip():
+            raise ValidationError("title", title, "Milestone title cannot be empty", context)
+
+        try:
+            session = await self.connection_manager.get_http_session()
+
+            payload = {
+                "title": title.strip(),
+                "description": description or ""
+            }
+
+            if due_date:
+                payload["due_on"] = due_date
+
+            async with session.post("/api/v1/repos/milestones", json=payload) as response:
+                await self._handle_response_errors(response, context)
+
+                data = await response.json()
+                created_milestone = self._map_api_milestone_to_domain(data)
+
+                logger.info(f"Created milestone: {title}")
+                return created_milestone
+
+        except aiohttp.ClientError as e:
+            logger.error(f"Network error creating milestone '{title}': {e}")
+            raise NetworkError(f"create milestone '{title}'", e, context)
+
+    def _map_api_project_to_domain(self, api_data: Dict[str, Any]) -> Project:
+        """Map Gitea API project data to domain Project object."""
+        # For now, create a basic project since Gitea projects API might be limited
+        created_at = datetime.fromisoformat(api_data.get("created_at", datetime.utcnow().isoformat()).replace("Z", "+00:00"))
+        updated_at = datetime.fromisoformat(api_data.get("updated_at", datetime.utcnow().isoformat()).replace("Z", "+00:00"))
+
+        return Project(
+            id=str(api_data.get("id", 0)),
+            name=api_data.get("title", api_data.get("name", "Unknown Project")),
+            description=api_data.get("body", api_data.get("description", "")),
+            state=ProjectState.ACTIVE,  # Default to active
+            milestones=[],  # Will be populated separately
+            created_at=created_at,
+            updated_at=updated_at
+        )
+
+    def _map_api_milestone_to_domain(self, api_data: Dict[str, Any]) -> Milestone:
+        """Map Gitea API milestone data to domain Milestone object."""
+        created_at = datetime.fromisoformat(api_data["created_at"].replace("Z", "+00:00"))
+        updated_at = datetime.fromisoformat(api_data["updated_at"].replace("Z", "+00:00"))
+
+        due_date = None
+        if api_data.get("due_on"):
+            due_date = datetime.fromisoformat(api_data["due_on"].replace("Z", "+00:00"))
+
+        return Milestone(
+            id=api_data["id"],
+            title=api_data["title"],
+            description=api_data.get("description", ""),
+            state=api_data.get("state", "open"),
+            open_issues=api_data.get("open_issues", 0),
+            closed_issues=api_data.get("closed_issues", 0),
+            due_date=due_date,
+            created_at=created_at,
+            updated_at=updated_at
+        )
+
+    async def _handle_response_errors(self, response: aiohttp.ClientResponse, context: ErrorContext):
+        """Handle HTTP response errors and convert to appropriate exceptions."""
+        # Reuse the same error handling logic from GiteaIssueRepository
+        if response.status == 200 or response.status == 201:
+            return
+
+        response_text = await response.text()
+
+        if response.status == 404:
+            resource_id = context.resource_id or "unknown"
+            raise ResourceNotFoundError(context.resource_type, resource_id, context)
+
+        elif response.status >= 400:
+            raise GiteaApiError(
+                response.status,
+                response_text,
+                str(response.url),
+                context
+            )
--- a/infrastructure/repositories/interfaces.py
+++ b/infrastructure/repositories/interfaces.py
@@ -0,0 +1,680 @@
+"""
+Abstract repository interfaces for data access patterns.
+
+Defines the contracts for data access operations across different
+data sources, enabling clean separation between business logic
+and infrastructure concerns.
+"""
+
+from abc import ABC, abstractmethod
+from typing import List, Optional, Dict, Any, AsyncContextManager
+from pathlib import Path
+
+from domain.issues.models import Issue
+from domain.projects.models import Project, Milestone
+from infrastructure.exceptions import ErrorContext
+
+
+class IssueRepository(ABC):
+    """Abstract repository for issue-related operations."""
+
+    @abstractmethod
+    async def get_issue(self, issue_number: int, context: Optional[ErrorContext] = None) -> Issue:
+        """
+        Retrieve an issue by its number.
+
+        Args:
+            issue_number: The issue number to retrieve
+            context: Error context for tracking operations
+
+        Returns:
+            Issue domain object
+
+        Raises:
+            ResourceNotFoundError: If issue doesn't exist
+            GiteaApiError: If API request fails
+            NetworkError: If network connectivity fails
+        """
+        pass
+
+    @abstractmethod
+    async def get_issues(
+        self,
+        project_id: Optional[str] = None,
+        state: Optional[str] = None,
+        labels: Optional[List[str]] = None,
+        limit: int = 100,
+        offset: int = 0,
+        context: Optional[ErrorContext] = None
+    ) -> List[Issue]:
+        """
+        Retrieve multiple issues with filtering and pagination.
+
+        Args:
+            project_id: Filter by project ID
+            state: Filter by issue state (open, closed)
+            labels: Filter by labels
+            limit: Maximum number of issues to return
+            offset: Number of issues to skip
+            context: Error context for tracking operations
+
+        Returns:
+            List of Issue domain objects
+
+        Raises:
+            GiteaApiError: If API request fails
+            NetworkError: If network connectivity fails
+        """
+        pass
+
+    @abstractmethod
+    async def create_issue(
+        self,
+        title: str,
+        body: str,
+        labels: Optional[List[str]] = None,
+        assignees: Optional[List[str]] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Issue:
+        """
+        Create a new issue.
+
+        Args:
+            title: Issue title
+            body: Issue description
+            labels: List of label names
+            assignees: List of assignee usernames
+            context: Error context for tracking operations
+
+        Returns:
+            Created Issue domain object
+
+        Raises:
+            ValidationError: If input data is invalid
+            GiteaApiError: If API request fails
+            NetworkError: If network connectivity fails
+        """
+        pass
+
+    @abstractmethod
+    async def update_issue(
+        self,
+        issue_number: int,
+        title: Optional[str] = None,
+        body: Optional[str] = None,
+        state: Optional[str] = None,
+        labels: Optional[List[str]] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Issue:
+        """
+        Update an existing issue.
+
+        Args:
+            issue_number: Issue number to update
+            title: New title (if provided)
+            body: New body (if provided)
+            state: New state (if provided)
+            labels: New labels (if provided)
+            context: Error context for tracking operations
+
+        Returns:
+            Updated Issue domain object
+
+        Raises:
+            ResourceNotFoundError: If issue doesn't exist
+            ValidationError: If input data is invalid
+            GiteaApiError: If API request fails
+            ConcurrencyError: If issue was modified concurrently
+        """
+        pass
+
+    @abstractmethod
+    async def get_issue_project_info(
+        self,
+        issue_number: int,
+        context: Optional[ErrorContext] = None
+    ) -> Dict[str, Any]:
+        """
+        Get project-related information for an issue.
+
+        Args:
+            issue_number: Issue number
+            context: Error context for tracking operations
+
+        Returns:
+            Project information dictionary
+
+        Raises:
+            ResourceNotFoundError: If issue doesn't exist
+            GiteaApiError: If API request fails
+        """
+        pass
+
+
+class ProjectRepository(ABC):
+    """Abstract repository for project-related operations."""
+
+    @abstractmethod
+    async def get_project(self, project_id: str, context: Optional[ErrorContext] = None) -> Project:
+        """
+        Retrieve a project by its ID.
+
+        Args:
+            project_id: Project identifier
+            context: Error context for tracking operations
+
+        Returns:
+            Project domain object
+
+        Raises:
+            ResourceNotFoundError: If project doesn't exist
+            GiteaApiError: If API request fails
+        """
+        pass
+
+    @abstractmethod
+    async def get_projects(
+        self,
+        organization: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0,
+        context: Optional[ErrorContext] = None
+    ) -> List[Project]:
+        """
+        Retrieve multiple projects with pagination.
+
+        Args:
+            organization: Filter by organization
+            limit: Maximum number of projects to return
+            offset: Number of projects to skip
+            context: Error context for tracking operations
+
+        Returns:
+            List of Project domain objects
+
+        Raises:
+            GiteaApiError: If API request fails
+        """
+        pass
+
+    @abstractmethod
+    async def get_milestones(
+        self,
+        project_id: str,
+        state: Optional[str] = None,
+        context: Optional[ErrorContext] = None
+    ) -> List[Milestone]:
+        """
+        Retrieve milestones for a project.
+
+        Args:
+            project_id: Project identifier
+            state: Filter by milestone state
+            context: Error context for tracking operations
+
+        Returns:
+            List of Milestone domain objects
+
+        Raises:
+            ResourceNotFoundError: If project doesn't exist
+            GiteaApiError: If API request fails
+        """
+        pass
+
+    @abstractmethod
+    async def create_milestone(
+        self,
+        project_id: str,
+        title: str,
+        description: str,
+        due_date: Optional[str] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Milestone:
+        """
+        Create a new milestone for a project.
+
+        Args:
+            project_id: Project identifier
+            title: Milestone title
+            description: Milestone description
+            due_date: Due date (ISO format)
+            context: Error context for tracking operations
+
+        Returns:
+            Created Milestone domain object
+
+        Raises:
+            ResourceNotFoundError: If project doesn't exist
+            ValidationError: If input data is invalid
+            GiteaApiError: If API request fails
+        """
+        pass
+
+
+class DocumentRepository(ABC):
+    """Abstract repository for document storage and retrieval."""
+
+    @abstractmethod
+    async def store_document(
+        self,
+        filename: str,
+        content: str,
+        ast: Dict[str, Any],
+        context: Optional[ErrorContext] = None
+    ) -> str:
+        """
+        Store a document with its AST representation.
+
+        Args:
+            filename: Document filename
+            content: Document content
+            ast: Parsed AST representation
+            context: Error context for tracking operations
+
+        Returns:
+            Document ID
+
+        Raises:
+            ValidationError: If input data is invalid
+            DatabaseError: If storage operation fails
+            DuplicateResourceError: If document already exists
+        """
+        pass
+
+    @abstractmethod
+    async def get_document(
+        self,
+        document_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> Dict[str, Any]:
+        """
+        Retrieve a document by its ID.
+
+        Args:
+            document_id: Document identifier
+            context: Error context for tracking operations
+
+        Returns:
+            Document data dictionary
+
+        Raises:
+            ResourceNotFoundError: If document doesn't exist
+            DatabaseError: If retrieval operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def get_documents(
+        self,
+        filename_pattern: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0,
+        context: Optional[ErrorContext] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Retrieve multiple documents with filtering and pagination.
+
+        Args:
+            filename_pattern: Filter by filename pattern
+            limit: Maximum number of documents to return
+            offset: Number of documents to skip
+            context: Error context for tracking operations
+
+        Returns:
+            List of document data dictionaries
+
+        Raises:
+            DatabaseError: If retrieval operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def update_document(
+        self,
+        document_id: str,
+        content: Optional[str] = None,
+        ast: Optional[Dict[str, Any]] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Dict[str, Any]:
+        """
+        Update an existing document.
+
+        Args:
+            document_id: Document identifier
+            content: New content (if provided)
+            ast: New AST (if provided)
+            context: Error context for tracking operations
+
+        Returns:
+            Updated document data
+
+        Raises:
+            ResourceNotFoundError: If document doesn't exist
+            ValidationError: If input data is invalid
+            DatabaseError: If update operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def delete_document(
+        self,
+        document_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """
+        Delete a document.
+
+        Args:
+            document_id: Document identifier
+            context: Error context for tracking operations
+
+        Returns:
+            True if document was deleted
+
+        Raises:
+            ResourceNotFoundError: If document doesn't exist
+            DatabaseError: If deletion operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def get_cache_path(
+        self,
+        document_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """
+        Get the cache file path for a document.
+
+        Args:
+            document_id: Document identifier
+            context: Error context for tracking operations
+
+        Returns:
+            Path to cache file
+
+        Raises:
+            ResourceNotFoundError: If document doesn't exist
+        """
+        pass
+
+
+class WorkspaceRepository(ABC):
+    """Abstract repository for workspace file operations."""
+
+    @abstractmethod
+    async def create_workspace(
+        self,
+        workspace_id: str,
+        base_path: Path,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """
+        Create a new workspace directory.
+
+        Args:
+            workspace_id: Workspace identifier
+            base_path: Base directory for workspaces
+            context: Error context for tracking operations
+
+        Returns:
+            Path to created workspace
+
+        Raises:
+            DuplicateResourceError: If workspace already exists
+            ValidationError: If paths are invalid
+            FileSystemError: If directory creation fails
+        """
+        pass
+
+    @abstractmethod
+    async def get_workspace_path(
+        self,
+        workspace_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """
+        Get the path to a workspace.
+
+        Args:
+            workspace_id: Workspace identifier
+            context: Error context for tracking operations
+
+        Returns:
+            Path to workspace directory
+
+        Raises:
+            ResourceNotFoundError: If workspace doesn't exist
+        """
+        pass
+
+    @abstractmethod
+    async def list_workspaces(
+        self,
+        context: Optional[ErrorContext] = None
+    ) -> List[str]:
+        """
+        List all available workspaces.
+
+        Args:
+            context: Error context for tracking operations
+
+        Returns:
+            List of workspace identifiers
+
+        Raises:
+            FileSystemError: If directory listing fails
+        """
+        pass
+
+    @abstractmethod
+    async def write_file(
+        self,
+        workspace_id: str,
+        file_path: str,
+        content: str,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """
+        Write content to a file in the workspace.
+
+        Args:
+            workspace_id: Workspace identifier
+            file_path: Relative path within workspace
+            content: File content
+            context: Error context for tracking operations
+
+        Returns:
+            Full path to written file
+
+        Raises:
+            ResourceNotFoundError: If workspace doesn't exist
+            ValidationError: If file path is invalid
+            FileSystemError: If write operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def read_file(
+        self,
+        workspace_id: str,
+        file_path: str,
+        context: Optional[ErrorContext] = None
+    ) -> str:
+        """
+        Read content from a file in the workspace.
+
+        Args:
+            workspace_id: Workspace identifier
+            file_path: Relative path within workspace
+            context: Error context for tracking operations
+
+        Returns:
+            File content
+
+        Raises:
+            ResourceNotFoundError: If workspace or file doesn't exist
+            FileSystemError: If read operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def delete_workspace(
+        self,
+        workspace_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """
+        Delete a workspace and all its contents.
+
+        Args:
+            workspace_id: Workspace identifier
+            context: Error context for tracking operations
+
+        Returns:
+            True if workspace was deleted
+
+        Raises:
+            ResourceNotFoundError: If workspace doesn't exist
+            FileSystemError: If deletion fails
+        """
+        pass
+
+    @abstractmethod
+    async def list_files(
+        self,
+        workspace_id: str,
+        pattern: Optional[str] = None,
+        context: Optional[ErrorContext] = None
+    ) -> List[str]:
+        """
+        List files in a workspace.
+
+        Args:
+            workspace_id: Workspace identifier
+            pattern: File pattern to match
+            context: Error context for tracking operations
+
+        Returns:
+            List of relative file paths
+
+        Raises:
+            ResourceNotFoundError: If workspace doesn't exist
+            FileSystemError: If listing fails
+        """
+        pass
+
+
+class CacheRepository(ABC):
+    """Abstract repository for caching operations."""
+
+    @abstractmethod
+    async def get(
+        self,
+        key: str,
+        context: Optional[ErrorContext] = None
+    ) -> Optional[Any]:
+        """
+        Retrieve a value from cache.
+
+        Args:
+            key: Cache key
+            context: Error context for tracking operations
+
+        Returns:
+            Cached value or None if not found
+
+        Raises:
+            CacheError: If cache operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def set(
+        self,
+        key: str,
+        value: Any,
+        ttl: Optional[int] = None,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """
+        Store a value in cache.
+
+        Args:
+            key: Cache key
+            value: Value to cache
+            ttl: Time to live in seconds
+            context: Error context for tracking operations
+
+        Returns:
+            True if value was stored
+
+        Raises:
+            CacheError: If cache operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def delete(
+        self,
+        key: str,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """
+        Delete a value from cache.
+
+        Args:
+            key: Cache key
+            context: Error context for tracking operations
+
+        Returns:
+            True if value was deleted
+
+        Raises:
+            CacheError: If cache operation fails
+        """
+        pass
+
+    @abstractmethod
+    async def invalidate_pattern(
+        self,
+        pattern: str,
+        context: Optional[ErrorContext] = None
+    ) -> int:
+        """
+        Invalidate cache entries matching a pattern.
+
+        Args:
+            pattern: Pattern to match (e.g., "user:*")
+            context: Error context for tracking operations
+
+        Returns:
+            Number of invalidated entries
+
+        Raises:
+            CacheInvalidationError: If invalidation fails
+        """
+        pass
+
+    @abstractmethod
+    async def store_ast_cache(
+        self,
+        document_id: str,
+        ast: Dict[str, Any],
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """
+        Store AST cache for a document.
+
+        Args:
+            document_id: Document identifier
+            ast: AST representation
+            context: Error context for tracking operations
+
+        Returns:
+            True if cache was stored
+
+        Raises:
+            CacheError: If cache operation fails
+        """
+        pass
--- a/infrastructure/repositories/sqlite_repository.py
+++ b/infrastructure/repositories/sqlite_repository.py
@@ -0,0 +1,677 @@
+"""
+SQLite repository implementation with transaction support.
+
+Provides efficient database operations with connection pooling,
+transaction management, and proper error handling.
+"""
+
+import sqlite3
+import json
+import uuid
+from infrastructure.logging import get_logger
+from typing import List, Optional, Dict, Any
+from datetime import datetime
+from pathlib import Path
+from contextlib import asynccontextmanager
+
+from infrastructure.repositories.interfaces import DocumentRepository, CacheRepository
+from infrastructure.connection_manager import ConnectionManager
+from infrastructure.exceptions import (
+    ErrorContext, OperationType, DatabaseError, ConnectionError,
+    ResourceNotFoundError, DuplicateResourceError, ValidationError,
+    TransactionError, QueryError
+)
+
+logger = get_logger(__name__)
+
+
+class SqliteDocumentRepository(DocumentRepository):
+    """
+    SQLite implementation of DocumentRepository with transaction support.
+
+    Provides efficient document storage and retrieval with proper
+    transaction handling and optimized database operations.
+    """
+
+    def __init__(self, connection_manager: ConnectionManager):
+        self.connection_manager = connection_manager
+        self._initialize_schema()
+
+    def _initialize_schema(self):
+        """Initialize database schema for documents."""
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            # Create documents table
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS documents (
+                    id TEXT PRIMARY KEY,
+                    filename TEXT NOT NULL,
+                    content TEXT NOT NULL,
+                    ast_json TEXT NOT NULL,
+                    content_hash TEXT NOT NULL,
+                    file_size INTEGER NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    UNIQUE(filename, content_hash)
+                )
+            """)
+
+            # Create cache table
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS ast_cache (
+                    id TEXT PRIMARY KEY,
+                    document_id TEXT NOT NULL,
+                    cache_path TEXT NOT NULL,
+                    cache_size INTEGER NOT NULL,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
+                )
+            """)
+
+            # Create indexes for performance
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_document_id ON ast_cache(document_id)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed_at ON ast_cache(accessed_at)")
+
+            conn.commit()
+            logger.info("Database schema initialized successfully")
+
+        except Exception as e:
+            logger.error(f"Failed to initialize database schema: {e}")
+            raise ConnectionError("markitect.db", e)
+
+    async def store_document(
+        self,
+        filename: str,
+        content: str,
+        ast: Dict[str, Any],
+        context: Optional[ErrorContext] = None
+    ) -> str:
+        """Store a document with its AST representation."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"store_document_{filename}",
+                operation_type=OperationType.WRITE,
+                resource_type="Document",
+                request_data={
+                    "filename": filename,
+                    "content_length": len(content),
+                    "ast_keys": list(ast.keys()) if ast else []
+                }
+            )
+
+        # Validate input
+        if not filename or not filename.strip():
+            raise ValidationError("filename", filename, "Filename cannot be empty", context)
+
+        if not content:
+            raise ValidationError("content", content, "Content cannot be empty", context)
+
+        if not ast:
+            raise ValidationError("ast", ast, "AST cannot be empty", context)
+
+        try:
+            async with self.connection_manager.transaction() as conn:
+                # Generate unique document ID
+                document_id = str(uuid.uuid4())
+
+                # Calculate content hash for deduplication
+                import hashlib
+                content_hash = hashlib.sha256(content.encode()).hexdigest()
+
+                # Check for duplicate content
+                cursor = conn.execute(
+                    "SELECT id FROM documents WHERE filename = ? AND content_hash = ?",
+                    (filename, content_hash)
+                )
+                existing = cursor.fetchone()
+
+                if existing:
+                    raise DuplicateResourceError("Document", filename, context)
+
+                # Store document
+                ast_json = json.dumps(ast)
+                file_size = len(content)
+                now = datetime.utcnow().isoformat()
+
+                conn.execute("""
+                    INSERT INTO documents (id, filename, content, ast_json, content_hash, file_size, created_at, updated_at)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """, (document_id, filename, content, ast_json, content_hash, file_size, now, now))
+
+                logger.info(f"Stored document {filename} with ID {document_id}")
+                return document_id
+
+        except sqlite3.IntegrityError as e:
+            if "UNIQUE constraint failed" in str(e):
+                raise DuplicateResourceError("Document", filename, context)
+            else:
+                raise DatabaseError(f"Integrity error storing document {filename}", e, context)
+
+        except Exception as e:
+            logger.error(f"Error storing document {filename}: {e}")
+            raise TransactionError(f"store document {filename}", e, context)
+
+    async def get_document(
+        self,
+        document_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> Dict[str, Any]:
+        """Retrieve a document by its ID."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_document_{document_id}",
+                operation_type=OperationType.READ,
+                resource_type="Document",
+                resource_id=document_id
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            cursor = conn.execute("""
+                SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
+                FROM documents
+                WHERE id = ?
+            """, (document_id,))
+
+            row = cursor.fetchone()
+
+            if not row:
+                raise ResourceNotFoundError("Document", document_id, context)
+
+            # Parse the row data
+            return {
+                "id": row[0],
+                "filename": row[1],
+                "content": row[2],
+                "ast": json.loads(row[3]),
+                "content_hash": row[4],
+                "file_size": row[5],
+                "created_at": row[6],
+                "updated_at": row[7]
+            }
+
+        except ResourceNotFoundError:
+            # Re-raise ResourceNotFoundError as-is
+            raise
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse AST JSON for document {document_id}: {e}")
+            raise QueryError(
+                f"SELECT * FROM documents WHERE id = '{document_id}'",
+                {"document_id": document_id},
+                e,
+                context
+            )
+
+        except Exception as e:
+            logger.error(f"Error retrieving document {document_id}: {e}")
+            raise QueryError(
+                f"SELECT * FROM documents WHERE id = '{document_id}'",
+                {"document_id": document_id},
+                e,
+                context
+            )
+
+    async def get_documents(
+        self,
+        filename_pattern: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0,
+        context: Optional[ErrorContext] = None
+    ) -> List[Dict[str, Any]]:
+        """Retrieve multiple documents with filtering and pagination."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_documents_{filename_pattern or 'all'}",
+                operation_type=OperationType.READ,
+                resource_type="Document",
+                metadata={
+                    "filename_pattern": filename_pattern,
+                    "limit": limit,
+                    "offset": offset
+                }
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            # Build query based on filter
+            if filename_pattern:
+                query = """
+                    SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
+                    FROM documents
+                    WHERE filename LIKE ?
+                    ORDER BY created_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (f"%{filename_pattern}%", limit, offset)
+            else:
+                query = """
+                    SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
+                    FROM documents
+                    ORDER BY created_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (limit, offset)
+
+            cursor = conn.execute(query, params)
+            rows = cursor.fetchall()
+
+            documents = []
+            for row in rows:
+                try:
+                    document = {
+                        "id": row[0],
+                        "filename": row[1],
+                        "content": row[2],
+                        "ast": json.loads(row[3]),
+                        "content_hash": row[4],
+                        "file_size": row[5],
+                        "created_at": row[6],
+                        "updated_at": row[7]
+                    }
+                    documents.append(document)
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Skipping document {row[0]} due to invalid AST JSON: {e}")
+                    continue
+
+            return documents
+
+        except Exception as e:
+            logger.error(f"Error retrieving documents: {e}")
+            raise QueryError("SELECT documents with pagination", {"limit": limit, "offset": offset}, e, context)
+
+    async def update_document(
+        self,
+        document_id: str,
+        content: Optional[str] = None,
+        ast: Optional[Dict[str, Any]] = None,
+        context: Optional[ErrorContext] = None
+    ) -> Dict[str, Any]:
+        """Update an existing document."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"update_document_{document_id}",
+                operation_type=OperationType.UPDATE,
+                resource_type="Document",
+                resource_id=document_id,
+                request_data={
+                    "content_length": len(content) if content else None,
+                    "ast_keys": list(ast.keys()) if ast else None
+                }
+            )
+
+        try:
+            async with self.connection_manager.transaction() as conn:
+                # Check if document exists
+                cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
+                if not cursor.fetchone():
+                    raise ResourceNotFoundError("Document", document_id, context)
+
+                # Build update query
+                updates = []
+                params = []
+
+                if content is not None:
+                    # Recalculate content hash
+                    import hashlib
+                    content_hash = hashlib.sha256(content.encode()).hexdigest()
+                    file_size = len(content)
+
+                    updates.extend(["content = ?", "content_hash = ?", "file_size = ?"])
+                    params.extend([content, content_hash, file_size])
+
+                if ast is not None:
+                    ast_json = json.dumps(ast)
+                    updates.append("ast_json = ?")
+                    params.append(ast_json)
+
+                if not updates:
+                    # No changes to make
+                    return await self.get_document(document_id, context)
+
+                # Add updated timestamp
+                updates.append("updated_at = ?")
+                params.append(datetime.utcnow().isoformat())
+
+                # Add document_id for WHERE clause
+                params.append(document_id)
+
+                query = f"UPDATE documents SET {', '.join(updates)} WHERE id = ?"
+                conn.execute(query, params)
+
+                logger.info(f"Updated document {document_id}")
+
+                # Return updated document
+                return await self.get_document(document_id, context)
+
+        except Exception as e:
+            logger.error(f"Error updating document {document_id}: {e}")
+            raise TransactionError(f"update document {document_id}", e, context)
+
+    async def delete_document(
+        self,
+        document_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """Delete a document."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"delete_document_{document_id}",
+                operation_type=OperationType.DELETE,
+                resource_type="Document",
+                resource_id=document_id
+            )
+
+        try:
+            async with self.connection_manager.transaction() as conn:
+                # Check if document exists
+                cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
+                if not cursor.fetchone():
+                    raise ResourceNotFoundError("Document", document_id, context)
+
+                # Delete associated cache entries first (due to foreign key)
+                conn.execute("DELETE FROM ast_cache WHERE document_id = ?", (document_id,))
+
+                # Delete document
+                cursor = conn.execute("DELETE FROM documents WHERE id = ?", (document_id,))
+
+                deleted = cursor.rowcount > 0
+
+                if deleted:
+                    logger.info(f"Deleted document {document_id}")
+
+                return deleted
+
+        except Exception as e:
+            logger.error(f"Error deleting document {document_id}: {e}")
+            raise TransactionError(f"delete document {document_id}", e, context)
+
+    async def get_cache_path(
+        self,
+        document_id: str,
+        context: Optional[ErrorContext] = None
+    ) -> Path:
+        """Get the cache file path for a document."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"get_cache_path_{document_id}",
+                operation_type=OperationType.READ,
+                resource_type="CachePath",
+                resource_id=document_id
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            cursor = conn.execute("""
+                SELECT cache_path FROM ast_cache WHERE document_id = ?
+            """, (document_id,))
+
+            row = cursor.fetchone()
+
+            if not row:
+                raise ResourceNotFoundError("Cache", document_id, context)
+
+            return Path(row[0])
+
+        except Exception as e:
+            logger.error(f"Error getting cache path for document {document_id}: {e}")
+            raise QueryError(
+                f"SELECT cache_path FROM ast_cache WHERE document_id = '{document_id}'",
+                {"document_id": document_id},
+                e,
+                context
+            )
+
+
+class SqliteCacheRepository(CacheRepository):
+    """
+    SQLite implementation of CacheRepository.
+
+    Provides efficient caching operations using SQLite as storage backend.
+    """
+
+    def __init__(self, connection_manager: ConnectionManager):
+        self.connection_manager = connection_manager
+        self._initialize_cache_schema()
+
+    def _initialize_cache_schema(self):
+        """Initialize database schema for cache operations."""
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            # Create cache entries table
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS cache_entries (
+                    key TEXT PRIMARY KEY,
+                    value_json TEXT NOT NULL,
+                    ttl_expires_at TIMESTAMP,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+
+            # Create index for TTL cleanup
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_ttl ON cache_entries(ttl_expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed ON cache_entries(accessed_at)")
+
+            conn.commit()
+            logger.info("Cache schema initialized successfully")
+
+        except Exception as e:
+            logger.error(f"Failed to initialize cache schema: {e}")
+            raise ConnectionError("markitect.db", e)
+
+    async def get(
+        self,
+        key: str,
+        context: Optional[ErrorContext] = None
+    ) -> Optional[Any]:
+        """Retrieve a value from cache."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"cache_get_{key}",
+                operation_type=OperationType.READ,
+                resource_type="Cache",
+                resource_id=key
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            # Clean up expired entries first
+            await self._cleanup_expired_entries(conn)
+
+            cursor = conn.execute("""
+                SELECT value_json FROM cache_entries
+                WHERE key = ? AND (ttl_expires_at IS NULL OR ttl_expires_at > CURRENT_TIMESTAMP)
+            """, (key,))
+
+            row = cursor.fetchone()
+
+            if row:
+                # Update access time
+                conn.execute("""
+                    UPDATE cache_entries SET accessed_at = CURRENT_TIMESTAMP WHERE key = ?
+                """, (key,))
+                conn.commit()
+
+                return json.loads(row[0])
+
+            return None
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse cached value for key {key}: {e}")
+            # Remove corrupted cache entry
+            conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
+            conn.commit()
+            return None
+
+        except Exception as e:
+            logger.error(f"Error getting cache value for key {key}: {e}")
+            return None
+
+    async def set(
+        self,
+        key: str,
+        value: Any,
+        ttl: Optional[int] = None,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """Store a value in cache."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"cache_set_{key}",
+                operation_type=OperationType.WRITE,
+                resource_type="Cache",
+                resource_id=key,
+                request_data={"ttl": ttl}
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            # Calculate expiration time
+            expires_at = None
+            if ttl:
+                from datetime import timedelta
+                expires_at = (datetime.utcnow() + timedelta(seconds=ttl)).isoformat()
+
+            # Serialize value
+            value_json = json.dumps(value)
+
+            # Upsert cache entry
+            conn.execute("""
+                INSERT OR REPLACE INTO cache_entries (key, value_json, ttl_expires_at, created_at, accessed_at)
+                VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
+            """, (key, value_json, expires_at))
+
+            conn.commit()
+            return True
+
+        except Exception as e:
+            logger.error(f"Error setting cache value for key {key}: {e}")
+            return False
+
+    async def delete(
+        self,
+        key: str,
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """Delete a value from cache."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"cache_delete_{key}",
+                operation_type=OperationType.DELETE,
+                resource_type="Cache",
+                resource_id=key
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            cursor = conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
+            conn.commit()
+
+            return cursor.rowcount > 0
+
+        except Exception as e:
+            logger.error(f"Error deleting cache value for key {key}: {e}")
+            return False
+
+    async def invalidate_pattern(
+        self,
+        pattern: str,
+        context: Optional[ErrorContext] = None
+    ) -> int:
+        """Invalidate cache entries matching a pattern."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"cache_invalidate_{pattern}",
+                operation_type=OperationType.DELETE,
+                resource_type="Cache",
+                metadata={"pattern": pattern}
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            # Convert pattern to SQL LIKE pattern
+            sql_pattern = pattern.replace("*", "%")
+
+            cursor = conn.execute("DELETE FROM cache_entries WHERE key LIKE ?", (sql_pattern,))
+            conn.commit()
+
+            deleted_count = cursor.rowcount
+            logger.info(f"Invalidated {deleted_count} cache entries matching pattern '{pattern}'")
+
+            return deleted_count
+
+        except Exception as e:
+            logger.error(f"Error invalidating cache pattern {pattern}: {e}")
+            raise QueryError(f"DELETE FROM cache_entries WHERE key LIKE '{pattern}'", {"pattern": pattern}, e, context)
+
+    async def store_ast_cache(
+        self,
+        document_id: str,
+        ast: Dict[str, Any],
+        context: Optional[ErrorContext] = None
+    ) -> bool:
+        """Store AST cache for a document."""
+        if context is None:
+            context = ErrorContext(
+                operation_id=f"store_ast_cache_{document_id}",
+                operation_type=OperationType.WRITE,
+                resource_type="ASTCache",
+                resource_id=document_id
+            )
+
+        try:
+            conn = self.connection_manager.get_database_connection()
+
+            # Generate cache file path
+            cache_id = str(uuid.uuid4())
+            cache_path = f".cache/ast/{document_id}/{cache_id}.json"
+
+            # Create cache directory
+            cache_dir = Path(cache_path).parent
+            cache_dir.mkdir(parents=True, exist_ok=True)
+
+            # Write AST to cache file
+            with open(cache_path, 'w') as f:
+                json.dump(ast, f, indent=2)
+
+            cache_size = Path(cache_path).stat().st_size
+
+            # Store cache metadata in database
+            conn.execute("""
+                INSERT OR REPLACE INTO ast_cache (id, document_id, cache_path, cache_size, created_at, accessed_at)
+                VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
+            """, (cache_id, document_id, cache_path, cache_size))
+
+            conn.commit()
+
+            logger.info(f"Stored AST cache for document {document_id} at {cache_path}")
+            return True
+
+        except Exception as e:
+            logger.error(f"Error storing AST cache for document {document_id}: {e}")
+            return False
+
+    async def _cleanup_expired_entries(self, conn: sqlite3.Connection):
+        """Clean up expired cache entries."""
+        try:
+            cursor = conn.execute("DELETE FROM cache_entries WHERE ttl_expires_at < CURRENT_TIMESTAMP")
+            deleted_count = cursor.rowcount
+
+            if deleted_count > 0:
+                logger.debug(f"Cleaned up {deleted_count} expired cache entries")
+
+        except Exception as e:
+            logger.warning(f"Error cleaning up expired cache entries: {e}")