fix: Add missing infrastructure files from data access improvements
Add infrastructure components that were created during issue #24 but not properly committed: - Data access repositories and interfaces - Connection management infrastructure - Exception handling framework - Configuration management - Documentation from data access pattern improvements These files are essential infrastructure components that enable the repository pattern and improved data access strategies. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
495
infrastructure/repositories/filesystem_repository.py
Normal file
495
infrastructure/repositories/filesystem_repository.py
Normal file
@@ -0,0 +1,495 @@
|
||||
"""
|
||||
Filesystem repository implementation with atomic operations.
|
||||
|
||||
Provides reliable file operations with proper error handling,
|
||||
atomic writes, and workspace management.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from infrastructure.logging import get_logger
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from infrastructure.repositories.interfaces import WorkspaceRepository
|
||||
from infrastructure.exceptions import (
|
||||
ErrorContext, OperationType, ResourceNotFoundError,
|
||||
DuplicateResourceError, ValidationError
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class FilesystemWorkspaceRepository(WorkspaceRepository):
|
||||
"""
|
||||
Filesystem implementation of WorkspaceRepository.
|
||||
|
||||
Provides reliable workspace and file operations with atomic writes,
|
||||
proper validation, and comprehensive error handling.
|
||||
"""
|
||||
|
||||
def __init__(self, base_workspace_dir: str = ".markitect_workspace"):
|
||||
self.base_path = Path(base_workspace_dir).resolve()
|
||||
self.base_path.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Initialized workspace repository at {self.base_path}")
|
||||
|
||||
async def create_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
base_path: Path,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""Create a new workspace directory."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"create_workspace_{workspace_id}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="Workspace",
|
||||
resource_id=workspace_id
|
||||
)
|
||||
|
||||
# Validate workspace ID
|
||||
if not self._is_valid_workspace_id(workspace_id):
|
||||
raise ValidationError(
|
||||
"workspace_id",
|
||||
workspace_id,
|
||||
"Workspace ID must be alphanumeric with optional dashes and underscores",
|
||||
context
|
||||
)
|
||||
|
||||
workspace_path = self.base_path / workspace_id
|
||||
|
||||
# Check if workspace already exists
|
||||
if workspace_path.exists():
|
||||
raise DuplicateResourceError("Workspace", workspace_id, context)
|
||||
|
||||
try:
|
||||
# Create workspace directory with proper permissions
|
||||
workspace_path.mkdir(parents=True, exist_ok=False, mode=0o755)
|
||||
|
||||
# Create standard subdirectories
|
||||
(workspace_path / "files").mkdir(exist_ok=True)
|
||||
(workspace_path / "temp").mkdir(exist_ok=True)
|
||||
(workspace_path / "logs").mkdir(exist_ok=True)
|
||||
|
||||
# Create workspace metadata file
|
||||
metadata = {
|
||||
"id": workspace_id,
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
"version": "1.0",
|
||||
"type": "markitect_workspace"
|
||||
}
|
||||
|
||||
await self._write_json_file(
|
||||
workspace_path / ".workspace_meta.json",
|
||||
metadata,
|
||||
context
|
||||
)
|
||||
|
||||
logger.info(f"Created workspace: {workspace_id}")
|
||||
return workspace_path
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to create workspace {workspace_id}: {e}")
|
||||
# Cleanup partial creation
|
||||
if workspace_path.exists():
|
||||
shutil.rmtree(workspace_path, ignore_errors=True)
|
||||
|
||||
raise self._map_os_error_to_exception(e, f"create workspace {workspace_id}", context)
|
||||
|
||||
async def get_workspace_path(
|
||||
self,
|
||||
workspace_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""Get the path to a workspace."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_workspace_path_{workspace_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Workspace",
|
||||
resource_id=workspace_id
|
||||
)
|
||||
|
||||
workspace_path = self.base_path / workspace_id
|
||||
|
||||
if not workspace_path.exists() or not workspace_path.is_dir():
|
||||
raise ResourceNotFoundError("Workspace", workspace_id, context)
|
||||
|
||||
return workspace_path
|
||||
|
||||
async def list_workspaces(
|
||||
self,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[str]:
|
||||
"""List all available workspaces."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id="list_workspaces",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Workspace"
|
||||
)
|
||||
|
||||
try:
|
||||
workspaces = []
|
||||
|
||||
if not self.base_path.exists():
|
||||
return workspaces
|
||||
|
||||
for item in self.base_path.iterdir():
|
||||
if item.is_dir() and self._is_valid_workspace_id(item.name):
|
||||
# Verify it's a valid workspace by checking for metadata
|
||||
metadata_file = item / ".workspace_meta.json"
|
||||
if metadata_file.exists():
|
||||
workspaces.append(item.name)
|
||||
|
||||
return sorted(workspaces)
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to list workspaces: {e}")
|
||||
raise self._map_os_error_to_exception(e, "list workspaces", context)
|
||||
|
||||
async def write_file(
|
||||
self,
|
||||
workspace_id: str,
|
||||
file_path: str,
|
||||
content: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""Write content to a file in the workspace using atomic operations."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"write_file_{workspace_id}_{file_path}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="WorkspaceFile",
|
||||
resource_id=f"{workspace_id}/{file_path}",
|
||||
request_data={"content_length": len(content)}
|
||||
)
|
||||
|
||||
# Validate inputs
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
|
||||
if not self._is_safe_file_path(file_path):
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
"File path contains invalid characters or attempts directory traversal",
|
||||
context
|
||||
)
|
||||
|
||||
# Validate file extension
|
||||
allowed_extensions = {".md", ".txt", ".py", ".js", ".json", ".yaml", ".yml", ".rst", ".csv"}
|
||||
file_ext = Path(file_path).suffix.lower()
|
||||
if file_ext and file_ext not in allowed_extensions:
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
f"File extension {file_ext} is not allowed",
|
||||
context
|
||||
)
|
||||
|
||||
# Validate content size (100MB limit)
|
||||
max_size = 100 * 1024 * 1024 # 100MB
|
||||
if len(content.encode('utf-8')) > max_size:
|
||||
raise ValidationError(
|
||||
"content",
|
||||
f"{len(content)} characters",
|
||||
f"File content exceeds maximum size of {max_size} bytes",
|
||||
context
|
||||
)
|
||||
|
||||
target_path = workspace_path / "files" / file_path
|
||||
|
||||
try:
|
||||
# Ensure parent directory exists
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Atomic write using temporary file
|
||||
await self._atomic_write_file(target_path, content, context)
|
||||
|
||||
logger.info(f"Wrote file {file_path} in workspace {workspace_id}")
|
||||
return target_path
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to write file {file_path} in workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"write file {file_path}", context)
|
||||
|
||||
async def read_file(
|
||||
self,
|
||||
workspace_id: str,
|
||||
file_path: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> str:
|
||||
"""Read content from a file in the workspace."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"read_file_{workspace_id}_{file_path}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="WorkspaceFile",
|
||||
resource_id=f"{workspace_id}/{file_path}"
|
||||
)
|
||||
|
||||
# Validate inputs
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
|
||||
if not self._is_safe_file_path(file_path):
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
"File path contains invalid characters or attempts directory traversal",
|
||||
context
|
||||
)
|
||||
|
||||
target_path = workspace_path / "files" / file_path
|
||||
|
||||
if not target_path.exists():
|
||||
raise ResourceNotFoundError("File", f"{workspace_id}/{file_path}", context)
|
||||
|
||||
if not target_path.is_file():
|
||||
raise ValidationError(
|
||||
"file_path",
|
||||
file_path,
|
||||
"Path exists but is not a regular file",
|
||||
context
|
||||
)
|
||||
|
||||
try:
|
||||
# Read file with encoding detection
|
||||
content = target_path.read_text(encoding='utf-8')
|
||||
|
||||
logger.debug(f"Read file {file_path} from workspace {workspace_id}")
|
||||
return content
|
||||
|
||||
except UnicodeDecodeError as e:
|
||||
logger.error(f"Failed to decode file {file_path} as UTF-8: {e}")
|
||||
raise ValidationError(
|
||||
"file_content",
|
||||
"binary data",
|
||||
"File does not contain valid UTF-8 text",
|
||||
context
|
||||
)
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to read file {file_path} from workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"read file {file_path}", context)
|
||||
|
||||
async def delete_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""Delete a workspace and all its contents."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"delete_workspace_{workspace_id}",
|
||||
operation_type=OperationType.DELETE,
|
||||
resource_type="Workspace",
|
||||
resource_id=workspace_id
|
||||
)
|
||||
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
|
||||
try:
|
||||
# Use shutil.rmtree for recursive deletion
|
||||
shutil.rmtree(workspace_path)
|
||||
|
||||
logger.info(f"Deleted workspace: {workspace_id}")
|
||||
return True
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"delete workspace {workspace_id}", context)
|
||||
|
||||
async def list_files(
|
||||
self,
|
||||
workspace_id: str,
|
||||
pattern: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[str]:
|
||||
"""List files in a workspace."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"list_files_{workspace_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="WorkspaceFile",
|
||||
metadata={"workspace_id": workspace_id, "pattern": pattern}
|
||||
)
|
||||
|
||||
workspace_path = await self.get_workspace_path(workspace_id, context)
|
||||
files_dir = workspace_path / "files"
|
||||
|
||||
if not files_dir.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
files = []
|
||||
|
||||
# Walk through all files in the workspace
|
||||
for item in files_dir.rglob("*"):
|
||||
if item.is_file():
|
||||
# Get relative path from files directory
|
||||
relative_path = str(item.relative_to(files_dir))
|
||||
|
||||
# Apply pattern filter if provided
|
||||
if pattern is None or self._matches_pattern(relative_path, pattern):
|
||||
files.append(relative_path)
|
||||
|
||||
return sorted(files)
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to list files in workspace {workspace_id}: {e}")
|
||||
raise self._map_os_error_to_exception(e, f"list files in workspace {workspace_id}", context)
|
||||
|
||||
async def cleanup_old_workspaces(self, days_threshold: int = 30) -> int:
|
||||
"""Clean up workspaces older than specified days."""
|
||||
logger.info(f"Starting cleanup of workspaces older than {days_threshold} days")
|
||||
|
||||
try:
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days_threshold)
|
||||
deleted_count = 0
|
||||
|
||||
if not self.base_path.exists():
|
||||
return 0
|
||||
|
||||
for workspace_dir in self.base_path.iterdir():
|
||||
if not workspace_dir.is_dir():
|
||||
continue
|
||||
|
||||
try:
|
||||
# Check workspace metadata for creation date
|
||||
metadata_file = workspace_dir / ".workspace_meta.json"
|
||||
if not metadata_file.exists():
|
||||
continue
|
||||
|
||||
metadata = await self._read_json_file(metadata_file)
|
||||
created_at_str = metadata.get("created_at")
|
||||
|
||||
if not created_at_str:
|
||||
continue
|
||||
|
||||
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
||||
|
||||
if created_at < cutoff_date:
|
||||
await self.delete_workspace(workspace_dir.name)
|
||||
deleted_count += 1
|
||||
logger.info(f"Cleaned up old workspace: {workspace_dir.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process workspace {workspace_dir.name} during cleanup: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Cleanup completed: deleted {deleted_count} old workspaces")
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during workspace cleanup: {e}")
|
||||
return 0
|
||||
|
||||
# Helper methods
|
||||
|
||||
def _is_valid_workspace_id(self, workspace_id: str) -> bool:
|
||||
"""Validate workspace ID format."""
|
||||
if not workspace_id or len(workspace_id) > 100:
|
||||
return False
|
||||
|
||||
# Allow alphanumeric, dash, underscore
|
||||
import re
|
||||
return re.match(r'^[a-zA-Z0-9_-]+$', workspace_id) is not None
|
||||
|
||||
def _is_safe_file_path(self, file_path: str) -> bool:
|
||||
"""Check if file path is safe (no directory traversal)."""
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
# Normalize path
|
||||
normalized = os.path.normpath(file_path)
|
||||
|
||||
# Check for directory traversal attempts
|
||||
if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
|
||||
return False
|
||||
|
||||
# Check for absolute paths
|
||||
if os.path.isabs(normalized):
|
||||
return False
|
||||
|
||||
# Check for unsafe characters
|
||||
unsafe_chars = {"<", ">", ":", "\"", "|", "?", "*", "\0"}
|
||||
if any(char in file_path for char in unsafe_chars):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _matches_pattern(self, file_path: str, pattern: str) -> bool:
|
||||
"""Check if file path matches the given pattern."""
|
||||
import fnmatch
|
||||
return fnmatch.fnmatch(file_path.lower(), pattern.lower())
|
||||
|
||||
async def _atomic_write_file(self, target_path: Path, content: str, context: ErrorContext):
|
||||
"""Write file atomically using temporary file."""
|
||||
temp_dir = target_path.parent / ".tmp"
|
||||
temp_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Create temporary file in same directory as target
|
||||
temp_fd, temp_path = tempfile.mkstemp(
|
||||
dir=temp_dir,
|
||||
prefix=f".tmp_{target_path.name}_",
|
||||
suffix=".tmp"
|
||||
)
|
||||
|
||||
try:
|
||||
# Write content to temporary file
|
||||
with os.fdopen(temp_fd, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
f.flush()
|
||||
os.fsync(f.fileno()) # Ensure data is written to disk
|
||||
|
||||
# Atomic move to final location
|
||||
temp_path_obj = Path(temp_path)
|
||||
temp_path_obj.replace(target_path)
|
||||
|
||||
except Exception:
|
||||
# Clean up temporary file on error
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
finally:
|
||||
# Clean up temp directory if empty
|
||||
try:
|
||||
temp_dir.rmdir()
|
||||
except OSError:
|
||||
pass # Directory not empty or doesn't exist
|
||||
|
||||
async def _write_json_file(self, file_path: Path, data: dict, context: Optional[ErrorContext] = None):
|
||||
"""Write JSON data to file atomically."""
|
||||
import json
|
||||
json_content = json.dumps(data, indent=2)
|
||||
await self._atomic_write_file(file_path, json_content, context)
|
||||
|
||||
async def _read_json_file(self, file_path: Path) -> dict:
|
||||
"""Read JSON data from file."""
|
||||
import json
|
||||
content = file_path.read_text(encoding='utf-8')
|
||||
return json.loads(content)
|
||||
|
||||
def _map_os_error_to_exception(self, os_error: OSError, operation: str, context: ErrorContext):
|
||||
"""Map OS errors to appropriate domain exceptions."""
|
||||
from infrastructure.exceptions import (
|
||||
ResourceNotFoundError, ValidationError, DatabaseError
|
||||
)
|
||||
|
||||
if os_error.errno == 2: # No such file or directory
|
||||
return ResourceNotFoundError("File", operation, context)
|
||||
elif os_error.errno == 13: # Permission denied
|
||||
return ValidationError("permissions", operation, "Permission denied", context)
|
||||
elif os_error.errno == 28: # No space left on device
|
||||
return DatabaseError(f"Insufficient disk space for {operation}", os_error, context)
|
||||
elif os_error.errno == 17: # File exists
|
||||
return DuplicateResourceError("File", operation, context)
|
||||
else:
|
||||
return DatabaseError(f"Filesystem error during {operation}", os_error, context)
|
||||
618
infrastructure/repositories/gitea_repository.py
Normal file
618
infrastructure/repositories/gitea_repository.py
Normal file
@@ -0,0 +1,618 @@
|
||||
"""
|
||||
Gitea repository implementation with async HTTP client.
|
||||
|
||||
Provides high-performance, reliable access to Gitea API with connection pooling,
|
||||
retry mechanisms, and proper error handling.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from infrastructure.logging import get_logger
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
import aiohttp
|
||||
|
||||
from domain.issues.models import Issue, Label, IssueState
|
||||
from domain.projects.models import Project, Milestone, ProjectState
|
||||
from infrastructure.repositories.interfaces import IssueRepository, ProjectRepository
|
||||
from infrastructure.connection_manager import ConnectionManager, retry_with_backoff, RetryConfig
|
||||
from infrastructure.exceptions import (
|
||||
ErrorContext, OperationType, GiteaApiError, NetworkError,
|
||||
ResourceNotFoundError, ValidationError, ConcurrencyError
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class GiteaIssueRepository(IssueRepository):
|
||||
"""
|
||||
Gitea implementation of IssueRepository using async HTTP client.
|
||||
|
||||
Provides efficient access to Gitea issues API with connection pooling,
|
||||
automatic retries, and proper error handling.
|
||||
"""
|
||||
|
||||
def __init__(self, connection_manager: ConnectionManager, retry_config: Optional[RetryConfig] = None):
|
||||
self.connection_manager = connection_manager
|
||||
self.retry_config = retry_config or RetryConfig()
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def get_issue(self, issue_number: int, context: Optional[ErrorContext] = None) -> Issue:
|
||||
"""Retrieve an issue by its number from Gitea API."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_issue_{issue_number}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Issue",
|
||||
resource_id=str(issue_number)
|
||||
)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
async with session.get(f"/api/v1/repos/issues/{issue_number}") as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
return self._map_api_issue_to_domain(data)
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error getting issue {issue_number}: {e}")
|
||||
raise NetworkError(f"get issue {issue_number}", e, context)
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def get_issues(
|
||||
self,
|
||||
project_id: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
labels: Optional[List[str]] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Issue]:
|
||||
"""Retrieve multiple issues with filtering and pagination."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_issues_{project_id or 'all'}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Issue",
|
||||
metadata={
|
||||
"project_id": project_id,
|
||||
"state": state,
|
||||
"labels": labels,
|
||||
"limit": limit,
|
||||
"offset": offset
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
# Build query parameters
|
||||
params = {
|
||||
"limit": limit,
|
||||
"page": (offset // limit) + 1 # Gitea uses 1-based pagination
|
||||
}
|
||||
|
||||
if state:
|
||||
params["state"] = state
|
||||
|
||||
if labels:
|
||||
params["labels"] = ",".join(labels)
|
||||
|
||||
async with session.get("/api/v1/repos/issues", params=params) as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
return [self._map_api_issue_to_domain(issue_data) for issue_data in data]
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error getting issues: {e}")
|
||||
raise NetworkError("get issues", e, context)
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def create_issue(
|
||||
self,
|
||||
title: str,
|
||||
body: str,
|
||||
labels: Optional[List[str]] = None,
|
||||
assignees: Optional[List[str]] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Issue:
|
||||
"""Create a new issue via Gitea API."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"create_issue_{title[:50]}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="Issue",
|
||||
request_data={
|
||||
"title": title,
|
||||
"body": body,
|
||||
"labels": labels,
|
||||
"assignees": assignees
|
||||
}
|
||||
)
|
||||
|
||||
# Validate input
|
||||
if not title or not title.strip():
|
||||
raise ValidationError("title", title, "Title cannot be empty", context)
|
||||
|
||||
if len(title) > 255:
|
||||
raise ValidationError("title", title, "Title cannot exceed 255 characters", context)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
# Prepare request payload
|
||||
payload = {
|
||||
"title": title.strip(),
|
||||
"body": body or ""
|
||||
}
|
||||
|
||||
if labels:
|
||||
payload["labels"] = labels
|
||||
|
||||
if assignees:
|
||||
payload["assignees"] = assignees
|
||||
|
||||
async with session.post("/api/v1/repos/issues", json=payload) as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
created_issue = self._map_api_issue_to_domain(data)
|
||||
|
||||
logger.info(f"Created issue #{created_issue.number}: {title}")
|
||||
return created_issue
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error creating issue '{title}': {e}")
|
||||
raise NetworkError(f"create issue '{title}'", e, context)
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def update_issue(
|
||||
self,
|
||||
issue_number: int,
|
||||
title: Optional[str] = None,
|
||||
body: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
labels: Optional[List[str]] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Issue:
|
||||
"""Update an existing issue via Gitea API."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"update_issue_{issue_number}",
|
||||
operation_type=OperationType.UPDATE,
|
||||
resource_type="Issue",
|
||||
resource_id=str(issue_number),
|
||||
request_data={
|
||||
"title": title,
|
||||
"body": body,
|
||||
"state": state,
|
||||
"labels": labels
|
||||
}
|
||||
)
|
||||
|
||||
# Validate input
|
||||
if title is not None:
|
||||
if not title.strip():
|
||||
raise ValidationError("title", title, "Title cannot be empty", context)
|
||||
if len(title) > 255:
|
||||
raise ValidationError("title", title, "Title cannot exceed 255 characters", context)
|
||||
|
||||
if state is not None and state not in ["open", "closed"]:
|
||||
raise ValidationError("state", state, "State must be 'open' or 'closed'", context)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
# First, get current issue to check for concurrent modifications
|
||||
current_issue = await self.get_issue(issue_number, context)
|
||||
|
||||
# Prepare update payload
|
||||
payload = {}
|
||||
|
||||
if title is not None:
|
||||
payload["title"] = title.strip()
|
||||
|
||||
if body is not None:
|
||||
payload["body"] = body
|
||||
|
||||
if state is not None:
|
||||
payload["state"] = state
|
||||
|
||||
if labels is not None:
|
||||
payload["labels"] = labels
|
||||
|
||||
# Only update if there are changes
|
||||
if not payload:
|
||||
return current_issue
|
||||
|
||||
async with session.patch(f"/api/v1/repos/issues/{issue_number}", json=payload) as response:
|
||||
# Handle potential concurrent modification
|
||||
if response.status == 409:
|
||||
raise ConcurrencyError("Issue", str(issue_number), context)
|
||||
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
updated_issue = self._map_api_issue_to_domain(data)
|
||||
|
||||
logger.info(f"Updated issue #{issue_number}")
|
||||
return updated_issue
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error updating issue {issue_number}: {e}")
|
||||
raise NetworkError(f"update issue {issue_number}", e, context)
|
||||
|
||||
async def get_issue_project_info(
|
||||
self,
|
||||
issue_number: int,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Get project-related information for an issue."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_issue_project_info_{issue_number}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="ProjectInfo",
|
||||
resource_id=str(issue_number)
|
||||
)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
# Get issue details first
|
||||
issue = await self.get_issue(issue_number, context)
|
||||
|
||||
# Get repository information
|
||||
async with session.get("/api/v1/repos") as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
repo_data = await response.json()
|
||||
|
||||
# Get project boards if available
|
||||
project_info = {
|
||||
"repository": repo_data,
|
||||
"kanban_columns": ["Todo", "In Progress", "Review", "Done"], # Default columns
|
||||
"issue": {
|
||||
"number": issue.number,
|
||||
"title": issue.title,
|
||||
"state": issue.state.value,
|
||||
"labels": [label.name for label in issue.labels]
|
||||
}
|
||||
}
|
||||
|
||||
# Try to get actual project boards
|
||||
try:
|
||||
async with session.get("/api/v1/repos/projects") as projects_response:
|
||||
if projects_response.status == 200:
|
||||
projects_data = await projects_response.json()
|
||||
if projects_data:
|
||||
# Use first project's columns if available
|
||||
project_info["projects"] = projects_data
|
||||
except Exception:
|
||||
# Projects API might not be available, use defaults
|
||||
pass
|
||||
|
||||
return project_info
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error getting project info for issue {issue_number}: {e}")
|
||||
raise NetworkError(f"get project info for issue {issue_number}", e, context)
|
||||
|
||||
def _map_api_issue_to_domain(self, api_data: Dict[str, Any]) -> Issue:
|
||||
"""Map Gitea API issue data to domain Issue object."""
|
||||
# Map labels
|
||||
labels = []
|
||||
if "labels" in api_data:
|
||||
for label_data in api_data["labels"]:
|
||||
label = Label(
|
||||
name=label_data["name"],
|
||||
color=label_data.get("color", ""),
|
||||
description=label_data.get("description", "")
|
||||
)
|
||||
labels.append(label)
|
||||
|
||||
# Map state
|
||||
state_value = api_data.get("state", "open")
|
||||
issue_state = IssueState.OPEN if state_value == "open" else IssueState.CLOSED
|
||||
|
||||
# Parse dates
|
||||
created_at = datetime.fromisoformat(api_data["created_at"].replace("Z", "+00:00"))
|
||||
updated_at = datetime.fromisoformat(api_data["updated_at"].replace("Z", "+00:00"))
|
||||
|
||||
closed_at = None
|
||||
if api_data.get("closed_at"):
|
||||
closed_at = datetime.fromisoformat(api_data["closed_at"].replace("Z", "+00:00"))
|
||||
|
||||
return Issue(
|
||||
number=api_data["number"],
|
||||
title=api_data["title"],
|
||||
body=api_data.get("body", ""),
|
||||
state=issue_state,
|
||||
labels=labels,
|
||||
assignees=api_data.get("assignees", []),
|
||||
author=api_data.get("user", {}).get("login", "unknown"),
|
||||
created_at=created_at,
|
||||
updated_at=updated_at,
|
||||
closed_at=closed_at,
|
||||
url=api_data.get("html_url", "")
|
||||
)
|
||||
|
||||
async def _handle_response_errors(self, response: aiohttp.ClientResponse, context: ErrorContext):
|
||||
"""Handle HTTP response errors and convert to appropriate exceptions."""
|
||||
if response.status == 200 or response.status == 201:
|
||||
return
|
||||
|
||||
response_text = await response.text()
|
||||
|
||||
if response.status == 404:
|
||||
resource_id = context.resource_id or "unknown"
|
||||
raise ResourceNotFoundError(context.resource_type, resource_id, context)
|
||||
|
||||
elif response.status == 401:
|
||||
raise GiteaApiError(
|
||||
response.status,
|
||||
"Authentication failed - check API token",
|
||||
str(response.url),
|
||||
context
|
||||
)
|
||||
|
||||
elif response.status == 403:
|
||||
raise GiteaApiError(
|
||||
response.status,
|
||||
"Access forbidden - check API permissions",
|
||||
str(response.url),
|
||||
context
|
||||
)
|
||||
|
||||
elif response.status == 409:
|
||||
# Conflict - usually concurrent modification
|
||||
raise ConcurrencyError(context.resource_type, context.resource_id or "unknown", context)
|
||||
|
||||
elif response.status == 422:
|
||||
# Validation error
|
||||
try:
|
||||
error_data = await response.json()
|
||||
error_message = error_data.get("message", response_text)
|
||||
except:
|
||||
error_message = response_text
|
||||
|
||||
raise ValidationError("request", None, error_message, context)
|
||||
|
||||
elif response.status >= 500:
|
||||
raise GiteaApiError(
|
||||
response.status,
|
||||
f"Server error: {response_text}",
|
||||
str(response.url),
|
||||
context
|
||||
)
|
||||
|
||||
else:
|
||||
raise GiteaApiError(
|
||||
response.status,
|
||||
response_text,
|
||||
str(response.url),
|
||||
context
|
||||
)
|
||||
|
||||
|
||||
class GiteaProjectRepository(ProjectRepository):
|
||||
"""
|
||||
Gitea implementation of ProjectRepository.
|
||||
|
||||
Provides access to project and milestone information via Gitea API.
|
||||
"""
|
||||
|
||||
def __init__(self, connection_manager: ConnectionManager, retry_config: Optional[RetryConfig] = None):
|
||||
self.connection_manager = connection_manager
|
||||
self.retry_config = retry_config or RetryConfig()
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def get_project(self, project_id: str, context: Optional[ErrorContext] = None) -> Project:
|
||||
"""Retrieve a project by its ID from Gitea API."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_project_{project_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Project",
|
||||
resource_id=project_id
|
||||
)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
async with session.get(f"/api/v1/repos/projects/{project_id}") as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
return self._map_api_project_to_domain(data)
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error getting project {project_id}: {e}")
|
||||
raise NetworkError(f"get project {project_id}", e, context)
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def get_projects(
|
||||
self,
|
||||
organization: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Project]:
|
||||
"""Retrieve multiple projects with pagination."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_projects_{organization or 'all'}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Project",
|
||||
metadata={
|
||||
"organization": organization,
|
||||
"limit": limit,
|
||||
"offset": offset
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
params = {
|
||||
"limit": limit,
|
||||
"page": (offset // limit) + 1
|
||||
}
|
||||
|
||||
endpoint = "/api/v1/repos/projects"
|
||||
if organization:
|
||||
endpoint = f"/api/v1/orgs/{organization}/projects"
|
||||
|
||||
async with session.get(endpoint, params=params) as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
return [self._map_api_project_to_domain(project_data) for project_data in data]
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error getting projects: {e}")
|
||||
raise NetworkError("get projects", e, context)
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def get_milestones(
|
||||
self,
|
||||
project_id: str,
|
||||
state: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Milestone]:
|
||||
"""Retrieve milestones for a project."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_milestones_{project_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Milestone",
|
||||
metadata={"project_id": project_id, "state": state}
|
||||
)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
params = {}
|
||||
if state:
|
||||
params["state"] = state
|
||||
|
||||
async with session.get(f"/api/v1/repos/milestones", params=params) as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
return [self._map_api_milestone_to_domain(milestone_data) for milestone_data in data]
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error getting milestones for project {project_id}: {e}")
|
||||
raise NetworkError(f"get milestones for project {project_id}", e, context)
|
||||
|
||||
@retry_with_backoff(RetryConfig())
|
||||
async def create_milestone(
|
||||
self,
|
||||
project_id: str,
|
||||
title: str,
|
||||
description: str,
|
||||
due_date: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Milestone:
|
||||
"""Create a new milestone for a project."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"create_milestone_{title[:50]}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="Milestone",
|
||||
request_data={
|
||||
"project_id": project_id,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"due_date": due_date
|
||||
}
|
||||
)
|
||||
|
||||
# Validate input
|
||||
if not title or not title.strip():
|
||||
raise ValidationError("title", title, "Milestone title cannot be empty", context)
|
||||
|
||||
try:
|
||||
session = await self.connection_manager.get_http_session()
|
||||
|
||||
payload = {
|
||||
"title": title.strip(),
|
||||
"description": description or ""
|
||||
}
|
||||
|
||||
if due_date:
|
||||
payload["due_on"] = due_date
|
||||
|
||||
async with session.post("/api/v1/repos/milestones", json=payload) as response:
|
||||
await self._handle_response_errors(response, context)
|
||||
|
||||
data = await response.json()
|
||||
created_milestone = self._map_api_milestone_to_domain(data)
|
||||
|
||||
logger.info(f"Created milestone: {title}")
|
||||
return created_milestone
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"Network error creating milestone '{title}': {e}")
|
||||
raise NetworkError(f"create milestone '{title}'", e, context)
|
||||
|
||||
def _map_api_project_to_domain(self, api_data: Dict[str, Any]) -> Project:
|
||||
"""Map Gitea API project data to domain Project object."""
|
||||
# For now, create a basic project since Gitea projects API might be limited
|
||||
created_at = datetime.fromisoformat(api_data.get("created_at", datetime.utcnow().isoformat()).replace("Z", "+00:00"))
|
||||
updated_at = datetime.fromisoformat(api_data.get("updated_at", datetime.utcnow().isoformat()).replace("Z", "+00:00"))
|
||||
|
||||
return Project(
|
||||
id=str(api_data.get("id", 0)),
|
||||
name=api_data.get("title", api_data.get("name", "Unknown Project")),
|
||||
description=api_data.get("body", api_data.get("description", "")),
|
||||
state=ProjectState.ACTIVE, # Default to active
|
||||
milestones=[], # Will be populated separately
|
||||
created_at=created_at,
|
||||
updated_at=updated_at
|
||||
)
|
||||
|
||||
def _map_api_milestone_to_domain(self, api_data: Dict[str, Any]) -> Milestone:
|
||||
"""Map Gitea API milestone data to domain Milestone object."""
|
||||
created_at = datetime.fromisoformat(api_data["created_at"].replace("Z", "+00:00"))
|
||||
updated_at = datetime.fromisoformat(api_data["updated_at"].replace("Z", "+00:00"))
|
||||
|
||||
due_date = None
|
||||
if api_data.get("due_on"):
|
||||
due_date = datetime.fromisoformat(api_data["due_on"].replace("Z", "+00:00"))
|
||||
|
||||
return Milestone(
|
||||
id=api_data["id"],
|
||||
title=api_data["title"],
|
||||
description=api_data.get("description", ""),
|
||||
state=api_data.get("state", "open"),
|
||||
open_issues=api_data.get("open_issues", 0),
|
||||
closed_issues=api_data.get("closed_issues", 0),
|
||||
due_date=due_date,
|
||||
created_at=created_at,
|
||||
updated_at=updated_at
|
||||
)
|
||||
|
||||
async def _handle_response_errors(self, response: aiohttp.ClientResponse, context: ErrorContext):
|
||||
"""Handle HTTP response errors and convert to appropriate exceptions."""
|
||||
# Reuse the same error handling logic from GiteaIssueRepository
|
||||
if response.status == 200 or response.status == 201:
|
||||
return
|
||||
|
||||
response_text = await response.text()
|
||||
|
||||
if response.status == 404:
|
||||
resource_id = context.resource_id or "unknown"
|
||||
raise ResourceNotFoundError(context.resource_type, resource_id, context)
|
||||
|
||||
elif response.status >= 400:
|
||||
raise GiteaApiError(
|
||||
response.status,
|
||||
response_text,
|
||||
str(response.url),
|
||||
context
|
||||
)
|
||||
680
infrastructure/repositories/interfaces.py
Normal file
680
infrastructure/repositories/interfaces.py
Normal file
@@ -0,0 +1,680 @@
|
||||
"""
|
||||
Abstract repository interfaces for data access patterns.
|
||||
|
||||
Defines the contracts for data access operations across different
|
||||
data sources, enabling clean separation between business logic
|
||||
and infrastructure concerns.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional, Dict, Any, AsyncContextManager
|
||||
from pathlib import Path
|
||||
|
||||
from domain.issues.models import Issue
|
||||
from domain.projects.models import Project, Milestone
|
||||
from infrastructure.exceptions import ErrorContext
|
||||
|
||||
|
||||
class IssueRepository(ABC):
|
||||
"""Abstract repository for issue-related operations."""
|
||||
|
||||
@abstractmethod
|
||||
async def get_issue(self, issue_number: int, context: Optional[ErrorContext] = None) -> Issue:
|
||||
"""
|
||||
Retrieve an issue by its number.
|
||||
|
||||
Args:
|
||||
issue_number: The issue number to retrieve
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Issue domain object
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If issue doesn't exist
|
||||
GiteaApiError: If API request fails
|
||||
NetworkError: If network connectivity fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_issues(
|
||||
self,
|
||||
project_id: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
labels: Optional[List[str]] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Issue]:
|
||||
"""
|
||||
Retrieve multiple issues with filtering and pagination.
|
||||
|
||||
Args:
|
||||
project_id: Filter by project ID
|
||||
state: Filter by issue state (open, closed)
|
||||
labels: Filter by labels
|
||||
limit: Maximum number of issues to return
|
||||
offset: Number of issues to skip
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
List of Issue domain objects
|
||||
|
||||
Raises:
|
||||
GiteaApiError: If API request fails
|
||||
NetworkError: If network connectivity fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def create_issue(
|
||||
self,
|
||||
title: str,
|
||||
body: str,
|
||||
labels: Optional[List[str]] = None,
|
||||
assignees: Optional[List[str]] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Issue:
|
||||
"""
|
||||
Create a new issue.
|
||||
|
||||
Args:
|
||||
title: Issue title
|
||||
body: Issue description
|
||||
labels: List of label names
|
||||
assignees: List of assignee usernames
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Created Issue domain object
|
||||
|
||||
Raises:
|
||||
ValidationError: If input data is invalid
|
||||
GiteaApiError: If API request fails
|
||||
NetworkError: If network connectivity fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def update_issue(
|
||||
self,
|
||||
issue_number: int,
|
||||
title: Optional[str] = None,
|
||||
body: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
labels: Optional[List[str]] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Issue:
|
||||
"""
|
||||
Update an existing issue.
|
||||
|
||||
Args:
|
||||
issue_number: Issue number to update
|
||||
title: New title (if provided)
|
||||
body: New body (if provided)
|
||||
state: New state (if provided)
|
||||
labels: New labels (if provided)
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Updated Issue domain object
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If issue doesn't exist
|
||||
ValidationError: If input data is invalid
|
||||
GiteaApiError: If API request fails
|
||||
ConcurrencyError: If issue was modified concurrently
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_issue_project_info(
|
||||
self,
|
||||
issue_number: int,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get project-related information for an issue.
|
||||
|
||||
Args:
|
||||
issue_number: Issue number
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Project information dictionary
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If issue doesn't exist
|
||||
GiteaApiError: If API request fails
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ProjectRepository(ABC):
|
||||
"""Abstract repository for project-related operations."""
|
||||
|
||||
@abstractmethod
|
||||
async def get_project(self, project_id: str, context: Optional[ErrorContext] = None) -> Project:
|
||||
"""
|
||||
Retrieve a project by its ID.
|
||||
|
||||
Args:
|
||||
project_id: Project identifier
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Project domain object
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If project doesn't exist
|
||||
GiteaApiError: If API request fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_projects(
|
||||
self,
|
||||
organization: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Project]:
|
||||
"""
|
||||
Retrieve multiple projects with pagination.
|
||||
|
||||
Args:
|
||||
organization: Filter by organization
|
||||
limit: Maximum number of projects to return
|
||||
offset: Number of projects to skip
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
List of Project domain objects
|
||||
|
||||
Raises:
|
||||
GiteaApiError: If API request fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_milestones(
|
||||
self,
|
||||
project_id: str,
|
||||
state: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Milestone]:
|
||||
"""
|
||||
Retrieve milestones for a project.
|
||||
|
||||
Args:
|
||||
project_id: Project identifier
|
||||
state: Filter by milestone state
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
List of Milestone domain objects
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If project doesn't exist
|
||||
GiteaApiError: If API request fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def create_milestone(
|
||||
self,
|
||||
project_id: str,
|
||||
title: str,
|
||||
description: str,
|
||||
due_date: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Milestone:
|
||||
"""
|
||||
Create a new milestone for a project.
|
||||
|
||||
Args:
|
||||
project_id: Project identifier
|
||||
title: Milestone title
|
||||
description: Milestone description
|
||||
due_date: Due date (ISO format)
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Created Milestone domain object
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If project doesn't exist
|
||||
ValidationError: If input data is invalid
|
||||
GiteaApiError: If API request fails
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class DocumentRepository(ABC):
|
||||
"""Abstract repository for document storage and retrieval."""
|
||||
|
||||
@abstractmethod
|
||||
async def store_document(
|
||||
self,
|
||||
filename: str,
|
||||
content: str,
|
||||
ast: Dict[str, Any],
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> str:
|
||||
"""
|
||||
Store a document with its AST representation.
|
||||
|
||||
Args:
|
||||
filename: Document filename
|
||||
content: Document content
|
||||
ast: Parsed AST representation
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Document ID
|
||||
|
||||
Raises:
|
||||
ValidationError: If input data is invalid
|
||||
DatabaseError: If storage operation fails
|
||||
DuplicateResourceError: If document already exists
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_document(
|
||||
self,
|
||||
document_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve a document by its ID.
|
||||
|
||||
Args:
|
||||
document_id: Document identifier
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Document data dictionary
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If document doesn't exist
|
||||
DatabaseError: If retrieval operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_documents(
|
||||
self,
|
||||
filename_pattern: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve multiple documents with filtering and pagination.
|
||||
|
||||
Args:
|
||||
filename_pattern: Filter by filename pattern
|
||||
limit: Maximum number of documents to return
|
||||
offset: Number of documents to skip
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
List of document data dictionaries
|
||||
|
||||
Raises:
|
||||
DatabaseError: If retrieval operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def update_document(
|
||||
self,
|
||||
document_id: str,
|
||||
content: Optional[str] = None,
|
||||
ast: Optional[Dict[str, Any]] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Update an existing document.
|
||||
|
||||
Args:
|
||||
document_id: Document identifier
|
||||
content: New content (if provided)
|
||||
ast: New AST (if provided)
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Updated document data
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If document doesn't exist
|
||||
ValidationError: If input data is invalid
|
||||
DatabaseError: If update operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def delete_document(
|
||||
self,
|
||||
document_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Delete a document.
|
||||
|
||||
Args:
|
||||
document_id: Document identifier
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
True if document was deleted
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If document doesn't exist
|
||||
DatabaseError: If deletion operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_cache_path(
|
||||
self,
|
||||
document_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Get the cache file path for a document.
|
||||
|
||||
Args:
|
||||
document_id: Document identifier
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Path to cache file
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If document doesn't exist
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class WorkspaceRepository(ABC):
|
||||
"""Abstract repository for workspace file operations."""
|
||||
|
||||
@abstractmethod
|
||||
async def create_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
base_path: Path,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Create a new workspace directory.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
base_path: Base directory for workspaces
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Path to created workspace
|
||||
|
||||
Raises:
|
||||
DuplicateResourceError: If workspace already exists
|
||||
ValidationError: If paths are invalid
|
||||
FileSystemError: If directory creation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_workspace_path(
|
||||
self,
|
||||
workspace_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Get the path to a workspace.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Path to workspace directory
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If workspace doesn't exist
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def list_workspaces(
|
||||
self,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
List all available workspaces.
|
||||
|
||||
Args:
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
List of workspace identifiers
|
||||
|
||||
Raises:
|
||||
FileSystemError: If directory listing fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def write_file(
|
||||
self,
|
||||
workspace_id: str,
|
||||
file_path: str,
|
||||
content: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Write content to a file in the workspace.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
file_path: Relative path within workspace
|
||||
content: File content
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Full path to written file
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If workspace doesn't exist
|
||||
ValidationError: If file path is invalid
|
||||
FileSystemError: If write operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def read_file(
|
||||
self,
|
||||
workspace_id: str,
|
||||
file_path: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> str:
|
||||
"""
|
||||
Read content from a file in the workspace.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
file_path: Relative path within workspace
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
File content
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If workspace or file doesn't exist
|
||||
FileSystemError: If read operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def delete_workspace(
|
||||
self,
|
||||
workspace_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Delete a workspace and all its contents.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
True if workspace was deleted
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If workspace doesn't exist
|
||||
FileSystemError: If deletion fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def list_files(
|
||||
self,
|
||||
workspace_id: str,
|
||||
pattern: Optional[str] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
List files in a workspace.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
pattern: File pattern to match
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
List of relative file paths
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If workspace doesn't exist
|
||||
FileSystemError: If listing fails
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class CacheRepository(ABC):
|
||||
"""Abstract repository for caching operations."""
|
||||
|
||||
@abstractmethod
|
||||
async def get(
|
||||
self,
|
||||
key: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Optional[Any]:
|
||||
"""
|
||||
Retrieve a value from cache.
|
||||
|
||||
Args:
|
||||
key: Cache key
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Cached value or None if not found
|
||||
|
||||
Raises:
|
||||
CacheError: If cache operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def set(
|
||||
self,
|
||||
key: str,
|
||||
value: Any,
|
||||
ttl: Optional[int] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Store a value in cache.
|
||||
|
||||
Args:
|
||||
key: Cache key
|
||||
value: Value to cache
|
||||
ttl: Time to live in seconds
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
True if value was stored
|
||||
|
||||
Raises:
|
||||
CacheError: If cache operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def delete(
|
||||
self,
|
||||
key: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Delete a value from cache.
|
||||
|
||||
Args:
|
||||
key: Cache key
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
True if value was deleted
|
||||
|
||||
Raises:
|
||||
CacheError: If cache operation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def invalidate_pattern(
|
||||
self,
|
||||
pattern: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> int:
|
||||
"""
|
||||
Invalidate cache entries matching a pattern.
|
||||
|
||||
Args:
|
||||
pattern: Pattern to match (e.g., "user:*")
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
Number of invalidated entries
|
||||
|
||||
Raises:
|
||||
CacheInvalidationError: If invalidation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def store_ast_cache(
|
||||
self,
|
||||
document_id: str,
|
||||
ast: Dict[str, Any],
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Store AST cache for a document.
|
||||
|
||||
Args:
|
||||
document_id: Document identifier
|
||||
ast: AST representation
|
||||
context: Error context for tracking operations
|
||||
|
||||
Returns:
|
||||
True if cache was stored
|
||||
|
||||
Raises:
|
||||
CacheError: If cache operation fails
|
||||
"""
|
||||
pass
|
||||
677
infrastructure/repositories/sqlite_repository.py
Normal file
677
infrastructure/repositories/sqlite_repository.py
Normal file
@@ -0,0 +1,677 @@
|
||||
"""
|
||||
SQLite repository implementation with transaction support.
|
||||
|
||||
Provides efficient database operations with connection pooling,
|
||||
transaction management, and proper error handling.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import uuid
|
||||
from infrastructure.logging import get_logger
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from infrastructure.repositories.interfaces import DocumentRepository, CacheRepository
|
||||
from infrastructure.connection_manager import ConnectionManager
|
||||
from infrastructure.exceptions import (
|
||||
ErrorContext, OperationType, DatabaseError, ConnectionError,
|
||||
ResourceNotFoundError, DuplicateResourceError, ValidationError,
|
||||
TransactionError, QueryError
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class SqliteDocumentRepository(DocumentRepository):
|
||||
"""
|
||||
SQLite implementation of DocumentRepository with transaction support.
|
||||
|
||||
Provides efficient document storage and retrieval with proper
|
||||
transaction handling and optimized database operations.
|
||||
"""
|
||||
|
||||
def __init__(self, connection_manager: ConnectionManager):
|
||||
self.connection_manager = connection_manager
|
||||
self._initialize_schema()
|
||||
|
||||
def _initialize_schema(self):
|
||||
"""Initialize database schema for documents."""
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
# Create documents table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS documents (
|
||||
id TEXT PRIMARY KEY,
|
||||
filename TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
ast_json TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
file_size INTEGER NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE(filename, content_hash)
|
||||
)
|
||||
""")
|
||||
|
||||
# Create cache table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS ast_cache (
|
||||
id TEXT PRIMARY KEY,
|
||||
document_id TEXT NOT NULL,
|
||||
cache_path TEXT NOT NULL,
|
||||
cache_size INTEGER NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes for performance
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_document_id ON ast_cache(document_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed_at ON ast_cache(accessed_at)")
|
||||
|
||||
conn.commit()
|
||||
logger.info("Database schema initialized successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize database schema: {e}")
|
||||
raise ConnectionError("markitect.db", e)
|
||||
|
||||
async def store_document(
|
||||
self,
|
||||
filename: str,
|
||||
content: str,
|
||||
ast: Dict[str, Any],
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> str:
|
||||
"""Store a document with its AST representation."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"store_document_{filename}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="Document",
|
||||
request_data={
|
||||
"filename": filename,
|
||||
"content_length": len(content),
|
||||
"ast_keys": list(ast.keys()) if ast else []
|
||||
}
|
||||
)
|
||||
|
||||
# Validate input
|
||||
if not filename or not filename.strip():
|
||||
raise ValidationError("filename", filename, "Filename cannot be empty", context)
|
||||
|
||||
if not content:
|
||||
raise ValidationError("content", content, "Content cannot be empty", context)
|
||||
|
||||
if not ast:
|
||||
raise ValidationError("ast", ast, "AST cannot be empty", context)
|
||||
|
||||
try:
|
||||
async with self.connection_manager.transaction() as conn:
|
||||
# Generate unique document ID
|
||||
document_id = str(uuid.uuid4())
|
||||
|
||||
# Calculate content hash for deduplication
|
||||
import hashlib
|
||||
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||
|
||||
# Check for duplicate content
|
||||
cursor = conn.execute(
|
||||
"SELECT id FROM documents WHERE filename = ? AND content_hash = ?",
|
||||
(filename, content_hash)
|
||||
)
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
raise DuplicateResourceError("Document", filename, context)
|
||||
|
||||
# Store document
|
||||
ast_json = json.dumps(ast)
|
||||
file_size = len(content)
|
||||
now = datetime.utcnow().isoformat()
|
||||
|
||||
conn.execute("""
|
||||
INSERT INTO documents (id, filename, content, ast_json, content_hash, file_size, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (document_id, filename, content, ast_json, content_hash, file_size, now, now))
|
||||
|
||||
logger.info(f"Stored document {filename} with ID {document_id}")
|
||||
return document_id
|
||||
|
||||
except sqlite3.IntegrityError as e:
|
||||
if "UNIQUE constraint failed" in str(e):
|
||||
raise DuplicateResourceError("Document", filename, context)
|
||||
else:
|
||||
raise DatabaseError(f"Integrity error storing document {filename}", e, context)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing document {filename}: {e}")
|
||||
raise TransactionError(f"store document {filename}", e, context)
|
||||
|
||||
async def get_document(
|
||||
self,
|
||||
document_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Retrieve a document by its ID."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_document_{document_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Document",
|
||||
resource_id=document_id
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
cursor = conn.execute("""
|
||||
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
|
||||
FROM documents
|
||||
WHERE id = ?
|
||||
""", (document_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
|
||||
if not row:
|
||||
raise ResourceNotFoundError("Document", document_id, context)
|
||||
|
||||
# Parse the row data
|
||||
return {
|
||||
"id": row[0],
|
||||
"filename": row[1],
|
||||
"content": row[2],
|
||||
"ast": json.loads(row[3]),
|
||||
"content_hash": row[4],
|
||||
"file_size": row[5],
|
||||
"created_at": row[6],
|
||||
"updated_at": row[7]
|
||||
}
|
||||
|
||||
except ResourceNotFoundError:
|
||||
# Re-raise ResourceNotFoundError as-is
|
||||
raise
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse AST JSON for document {document_id}: {e}")
|
||||
raise QueryError(
|
||||
f"SELECT * FROM documents WHERE id = '{document_id}'",
|
||||
{"document_id": document_id},
|
||||
e,
|
||||
context
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving document {document_id}: {e}")
|
||||
raise QueryError(
|
||||
f"SELECT * FROM documents WHERE id = '{document_id}'",
|
||||
{"document_id": document_id},
|
||||
e,
|
||||
context
|
||||
)
|
||||
|
||||
async def get_documents(
|
||||
self,
|
||||
filename_pattern: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Retrieve multiple documents with filtering and pagination."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_documents_{filename_pattern or 'all'}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Document",
|
||||
metadata={
|
||||
"filename_pattern": filename_pattern,
|
||||
"limit": limit,
|
||||
"offset": offset
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
# Build query based on filter
|
||||
if filename_pattern:
|
||||
query = """
|
||||
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
|
||||
FROM documents
|
||||
WHERE filename LIKE ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
params = (f"%{filename_pattern}%", limit, offset)
|
||||
else:
|
||||
query = """
|
||||
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
|
||||
FROM documents
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
params = (limit, offset)
|
||||
|
||||
cursor = conn.execute(query, params)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
documents = []
|
||||
for row in rows:
|
||||
try:
|
||||
document = {
|
||||
"id": row[0],
|
||||
"filename": row[1],
|
||||
"content": row[2],
|
||||
"ast": json.loads(row[3]),
|
||||
"content_hash": row[4],
|
||||
"file_size": row[5],
|
||||
"created_at": row[6],
|
||||
"updated_at": row[7]
|
||||
}
|
||||
documents.append(document)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Skipping document {row[0]} due to invalid AST JSON: {e}")
|
||||
continue
|
||||
|
||||
return documents
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving documents: {e}")
|
||||
raise QueryError("SELECT documents with pagination", {"limit": limit, "offset": offset}, e, context)
|
||||
|
||||
async def update_document(
|
||||
self,
|
||||
document_id: str,
|
||||
content: Optional[str] = None,
|
||||
ast: Optional[Dict[str, Any]] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Update an existing document."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"update_document_{document_id}",
|
||||
operation_type=OperationType.UPDATE,
|
||||
resource_type="Document",
|
||||
resource_id=document_id,
|
||||
request_data={
|
||||
"content_length": len(content) if content else None,
|
||||
"ast_keys": list(ast.keys()) if ast else None
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
async with self.connection_manager.transaction() as conn:
|
||||
# Check if document exists
|
||||
cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
|
||||
if not cursor.fetchone():
|
||||
raise ResourceNotFoundError("Document", document_id, context)
|
||||
|
||||
# Build update query
|
||||
updates = []
|
||||
params = []
|
||||
|
||||
if content is not None:
|
||||
# Recalculate content hash
|
||||
import hashlib
|
||||
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||
file_size = len(content)
|
||||
|
||||
updates.extend(["content = ?", "content_hash = ?", "file_size = ?"])
|
||||
params.extend([content, content_hash, file_size])
|
||||
|
||||
if ast is not None:
|
||||
ast_json = json.dumps(ast)
|
||||
updates.append("ast_json = ?")
|
||||
params.append(ast_json)
|
||||
|
||||
if not updates:
|
||||
# No changes to make
|
||||
return await self.get_document(document_id, context)
|
||||
|
||||
# Add updated timestamp
|
||||
updates.append("updated_at = ?")
|
||||
params.append(datetime.utcnow().isoformat())
|
||||
|
||||
# Add document_id for WHERE clause
|
||||
params.append(document_id)
|
||||
|
||||
query = f"UPDATE documents SET {', '.join(updates)} WHERE id = ?"
|
||||
conn.execute(query, params)
|
||||
|
||||
logger.info(f"Updated document {document_id}")
|
||||
|
||||
# Return updated document
|
||||
return await self.get_document(document_id, context)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating document {document_id}: {e}")
|
||||
raise TransactionError(f"update document {document_id}", e, context)
|
||||
|
||||
async def delete_document(
|
||||
self,
|
||||
document_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""Delete a document."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"delete_document_{document_id}",
|
||||
operation_type=OperationType.DELETE,
|
||||
resource_type="Document",
|
||||
resource_id=document_id
|
||||
)
|
||||
|
||||
try:
|
||||
async with self.connection_manager.transaction() as conn:
|
||||
# Check if document exists
|
||||
cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
|
||||
if not cursor.fetchone():
|
||||
raise ResourceNotFoundError("Document", document_id, context)
|
||||
|
||||
# Delete associated cache entries first (due to foreign key)
|
||||
conn.execute("DELETE FROM ast_cache WHERE document_id = ?", (document_id,))
|
||||
|
||||
# Delete document
|
||||
cursor = conn.execute("DELETE FROM documents WHERE id = ?", (document_id,))
|
||||
|
||||
deleted = cursor.rowcount > 0
|
||||
|
||||
if deleted:
|
||||
logger.info(f"Deleted document {document_id}")
|
||||
|
||||
return deleted
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting document {document_id}: {e}")
|
||||
raise TransactionError(f"delete document {document_id}", e, context)
|
||||
|
||||
async def get_cache_path(
|
||||
self,
|
||||
document_id: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Path:
|
||||
"""Get the cache file path for a document."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"get_cache_path_{document_id}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="CachePath",
|
||||
resource_id=document_id
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
cursor = conn.execute("""
|
||||
SELECT cache_path FROM ast_cache WHERE document_id = ?
|
||||
""", (document_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
|
||||
if not row:
|
||||
raise ResourceNotFoundError("Cache", document_id, context)
|
||||
|
||||
return Path(row[0])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cache path for document {document_id}: {e}")
|
||||
raise QueryError(
|
||||
f"SELECT cache_path FROM ast_cache WHERE document_id = '{document_id}'",
|
||||
{"document_id": document_id},
|
||||
e,
|
||||
context
|
||||
)
|
||||
|
||||
|
||||
class SqliteCacheRepository(CacheRepository):
|
||||
"""
|
||||
SQLite implementation of CacheRepository.
|
||||
|
||||
Provides efficient caching operations using SQLite as storage backend.
|
||||
"""
|
||||
|
||||
def __init__(self, connection_manager: ConnectionManager):
|
||||
self.connection_manager = connection_manager
|
||||
self._initialize_cache_schema()
|
||||
|
||||
def _initialize_cache_schema(self):
|
||||
"""Initialize database schema for cache operations."""
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
# Create cache entries table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS cache_entries (
|
||||
key TEXT PRIMARY KEY,
|
||||
value_json TEXT NOT NULL,
|
||||
ttl_expires_at TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index for TTL cleanup
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_ttl ON cache_entries(ttl_expires_at)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed ON cache_entries(accessed_at)")
|
||||
|
||||
conn.commit()
|
||||
logger.info("Cache schema initialized successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize cache schema: {e}")
|
||||
raise ConnectionError("markitect.db", e)
|
||||
|
||||
async def get(
|
||||
self,
|
||||
key: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> Optional[Any]:
|
||||
"""Retrieve a value from cache."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"cache_get_{key}",
|
||||
operation_type=OperationType.READ,
|
||||
resource_type="Cache",
|
||||
resource_id=key
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
# Clean up expired entries first
|
||||
await self._cleanup_expired_entries(conn)
|
||||
|
||||
cursor = conn.execute("""
|
||||
SELECT value_json FROM cache_entries
|
||||
WHERE key = ? AND (ttl_expires_at IS NULL OR ttl_expires_at > CURRENT_TIMESTAMP)
|
||||
""", (key,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row:
|
||||
# Update access time
|
||||
conn.execute("""
|
||||
UPDATE cache_entries SET accessed_at = CURRENT_TIMESTAMP WHERE key = ?
|
||||
""", (key,))
|
||||
conn.commit()
|
||||
|
||||
return json.loads(row[0])
|
||||
|
||||
return None
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse cached value for key {key}: {e}")
|
||||
# Remove corrupted cache entry
|
||||
conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
|
||||
conn.commit()
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cache value for key {key}: {e}")
|
||||
return None
|
||||
|
||||
async def set(
|
||||
self,
|
||||
key: str,
|
||||
value: Any,
|
||||
ttl: Optional[int] = None,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""Store a value in cache."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"cache_set_{key}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="Cache",
|
||||
resource_id=key,
|
||||
request_data={"ttl": ttl}
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
# Calculate expiration time
|
||||
expires_at = None
|
||||
if ttl:
|
||||
from datetime import timedelta
|
||||
expires_at = (datetime.utcnow() + timedelta(seconds=ttl)).isoformat()
|
||||
|
||||
# Serialize value
|
||||
value_json = json.dumps(value)
|
||||
|
||||
# Upsert cache entry
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO cache_entries (key, value_json, ttl_expires_at, created_at, accessed_at)
|
||||
VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
""", (key, value_json, expires_at))
|
||||
|
||||
conn.commit()
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting cache value for key {key}: {e}")
|
||||
return False
|
||||
|
||||
async def delete(
|
||||
self,
|
||||
key: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""Delete a value from cache."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"cache_delete_{key}",
|
||||
operation_type=OperationType.DELETE,
|
||||
resource_type="Cache",
|
||||
resource_id=key
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
cursor = conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
|
||||
conn.commit()
|
||||
|
||||
return cursor.rowcount > 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting cache value for key {key}: {e}")
|
||||
return False
|
||||
|
||||
async def invalidate_pattern(
|
||||
self,
|
||||
pattern: str,
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> int:
|
||||
"""Invalidate cache entries matching a pattern."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"cache_invalidate_{pattern}",
|
||||
operation_type=OperationType.DELETE,
|
||||
resource_type="Cache",
|
||||
metadata={"pattern": pattern}
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
# Convert pattern to SQL LIKE pattern
|
||||
sql_pattern = pattern.replace("*", "%")
|
||||
|
||||
cursor = conn.execute("DELETE FROM cache_entries WHERE key LIKE ?", (sql_pattern,))
|
||||
conn.commit()
|
||||
|
||||
deleted_count = cursor.rowcount
|
||||
logger.info(f"Invalidated {deleted_count} cache entries matching pattern '{pattern}'")
|
||||
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error invalidating cache pattern {pattern}: {e}")
|
||||
raise QueryError(f"DELETE FROM cache_entries WHERE key LIKE '{pattern}'", {"pattern": pattern}, e, context)
|
||||
|
||||
async def store_ast_cache(
|
||||
self,
|
||||
document_id: str,
|
||||
ast: Dict[str, Any],
|
||||
context: Optional[ErrorContext] = None
|
||||
) -> bool:
|
||||
"""Store AST cache for a document."""
|
||||
if context is None:
|
||||
context = ErrorContext(
|
||||
operation_id=f"store_ast_cache_{document_id}",
|
||||
operation_type=OperationType.WRITE,
|
||||
resource_type="ASTCache",
|
||||
resource_id=document_id
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self.connection_manager.get_database_connection()
|
||||
|
||||
# Generate cache file path
|
||||
cache_id = str(uuid.uuid4())
|
||||
cache_path = f".cache/ast/{document_id}/{cache_id}.json"
|
||||
|
||||
# Create cache directory
|
||||
cache_dir = Path(cache_path).parent
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write AST to cache file
|
||||
with open(cache_path, 'w') as f:
|
||||
json.dump(ast, f, indent=2)
|
||||
|
||||
cache_size = Path(cache_path).stat().st_size
|
||||
|
||||
# Store cache metadata in database
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO ast_cache (id, document_id, cache_path, cache_size, created_at, accessed_at)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
""", (cache_id, document_id, cache_path, cache_size))
|
||||
|
||||
conn.commit()
|
||||
|
||||
logger.info(f"Stored AST cache for document {document_id} at {cache_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing AST cache for document {document_id}: {e}")
|
||||
return False
|
||||
|
||||
async def _cleanup_expired_entries(self, conn: sqlite3.Connection):
|
||||
"""Clean up expired cache entries."""
|
||||
try:
|
||||
cursor = conn.execute("DELETE FROM cache_entries WHERE ttl_expires_at < CURRENT_TIMESTAMP")
|
||||
deleted_count = cursor.rowcount
|
||||
|
||||
if deleted_count > 0:
|
||||
logger.debug(f"Cleaned up {deleted_count} expired cache entries")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error cleaning up expired cache entries: {e}")
|
||||
Reference in New Issue
Block a user