fix: Add missing infrastructure files from data access improvements

Add infrastructure components that were created during issue #24
but not properly committed:

- Data access repositories and interfaces
- Connection management infrastructure
- Exception handling framework
- Configuration management
- Documentation from data access pattern improvements

These files are essential infrastructure components that enable
the repository pattern and improved data access strategies.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-27 08:35:34 +02:00
parent 398c45d71c
commit f782ac1f69
8 changed files with 3819 additions and 0 deletions

View File

@@ -0,0 +1,495 @@
"""
Filesystem repository implementation with atomic operations.
Provides reliable file operations with proper error handling,
atomic writes, and workspace management.
"""
import os
import shutil
import tempfile
import uuid
from infrastructure.logging import get_logger
from typing import List, Optional
from pathlib import Path
from datetime import datetime, timedelta
from infrastructure.repositories.interfaces import WorkspaceRepository
from infrastructure.exceptions import (
ErrorContext, OperationType, ResourceNotFoundError,
DuplicateResourceError, ValidationError
)
logger = get_logger(__name__)
class FilesystemWorkspaceRepository(WorkspaceRepository):
"""
Filesystem implementation of WorkspaceRepository.
Provides reliable workspace and file operations with atomic writes,
proper validation, and comprehensive error handling.
"""
def __init__(self, base_workspace_dir: str = ".markitect_workspace"):
self.base_path = Path(base_workspace_dir).resolve()
self.base_path.mkdir(parents=True, exist_ok=True)
logger.info(f"Initialized workspace repository at {self.base_path}")
async def create_workspace(
self,
workspace_id: str,
base_path: Path,
context: Optional[ErrorContext] = None
) -> Path:
"""Create a new workspace directory."""
if context is None:
context = ErrorContext(
operation_id=f"create_workspace_{workspace_id}",
operation_type=OperationType.WRITE,
resource_type="Workspace",
resource_id=workspace_id
)
# Validate workspace ID
if not self._is_valid_workspace_id(workspace_id):
raise ValidationError(
"workspace_id",
workspace_id,
"Workspace ID must be alphanumeric with optional dashes and underscores",
context
)
workspace_path = self.base_path / workspace_id
# Check if workspace already exists
if workspace_path.exists():
raise DuplicateResourceError("Workspace", workspace_id, context)
try:
# Create workspace directory with proper permissions
workspace_path.mkdir(parents=True, exist_ok=False, mode=0o755)
# Create standard subdirectories
(workspace_path / "files").mkdir(exist_ok=True)
(workspace_path / "temp").mkdir(exist_ok=True)
(workspace_path / "logs").mkdir(exist_ok=True)
# Create workspace metadata file
metadata = {
"id": workspace_id,
"created_at": datetime.utcnow().isoformat(),
"version": "1.0",
"type": "markitect_workspace"
}
await self._write_json_file(
workspace_path / ".workspace_meta.json",
metadata,
context
)
logger.info(f"Created workspace: {workspace_id}")
return workspace_path
except OSError as e:
logger.error(f"Failed to create workspace {workspace_id}: {e}")
# Cleanup partial creation
if workspace_path.exists():
shutil.rmtree(workspace_path, ignore_errors=True)
raise self._map_os_error_to_exception(e, f"create workspace {workspace_id}", context)
async def get_workspace_path(
self,
workspace_id: str,
context: Optional[ErrorContext] = None
) -> Path:
"""Get the path to a workspace."""
if context is None:
context = ErrorContext(
operation_id=f"get_workspace_path_{workspace_id}",
operation_type=OperationType.READ,
resource_type="Workspace",
resource_id=workspace_id
)
workspace_path = self.base_path / workspace_id
if not workspace_path.exists() or not workspace_path.is_dir():
raise ResourceNotFoundError("Workspace", workspace_id, context)
return workspace_path
async def list_workspaces(
self,
context: Optional[ErrorContext] = None
) -> List[str]:
"""List all available workspaces."""
if context is None:
context = ErrorContext(
operation_id="list_workspaces",
operation_type=OperationType.READ,
resource_type="Workspace"
)
try:
workspaces = []
if not self.base_path.exists():
return workspaces
for item in self.base_path.iterdir():
if item.is_dir() and self._is_valid_workspace_id(item.name):
# Verify it's a valid workspace by checking for metadata
metadata_file = item / ".workspace_meta.json"
if metadata_file.exists():
workspaces.append(item.name)
return sorted(workspaces)
except OSError as e:
logger.error(f"Failed to list workspaces: {e}")
raise self._map_os_error_to_exception(e, "list workspaces", context)
async def write_file(
self,
workspace_id: str,
file_path: str,
content: str,
context: Optional[ErrorContext] = None
) -> Path:
"""Write content to a file in the workspace using atomic operations."""
if context is None:
context = ErrorContext(
operation_id=f"write_file_{workspace_id}_{file_path}",
operation_type=OperationType.WRITE,
resource_type="WorkspaceFile",
resource_id=f"{workspace_id}/{file_path}",
request_data={"content_length": len(content)}
)
# Validate inputs
workspace_path = await self.get_workspace_path(workspace_id, context)
if not self._is_safe_file_path(file_path):
raise ValidationError(
"file_path",
file_path,
"File path contains invalid characters or attempts directory traversal",
context
)
# Validate file extension
allowed_extensions = {".md", ".txt", ".py", ".js", ".json", ".yaml", ".yml", ".rst", ".csv"}
file_ext = Path(file_path).suffix.lower()
if file_ext and file_ext not in allowed_extensions:
raise ValidationError(
"file_path",
file_path,
f"File extension {file_ext} is not allowed",
context
)
# Validate content size (100MB limit)
max_size = 100 * 1024 * 1024 # 100MB
if len(content.encode('utf-8')) > max_size:
raise ValidationError(
"content",
f"{len(content)} characters",
f"File content exceeds maximum size of {max_size} bytes",
context
)
target_path = workspace_path / "files" / file_path
try:
# Ensure parent directory exists
target_path.parent.mkdir(parents=True, exist_ok=True)
# Atomic write using temporary file
await self._atomic_write_file(target_path, content, context)
logger.info(f"Wrote file {file_path} in workspace {workspace_id}")
return target_path
except OSError as e:
logger.error(f"Failed to write file {file_path} in workspace {workspace_id}: {e}")
raise self._map_os_error_to_exception(e, f"write file {file_path}", context)
async def read_file(
self,
workspace_id: str,
file_path: str,
context: Optional[ErrorContext] = None
) -> str:
"""Read content from a file in the workspace."""
if context is None:
context = ErrorContext(
operation_id=f"read_file_{workspace_id}_{file_path}",
operation_type=OperationType.READ,
resource_type="WorkspaceFile",
resource_id=f"{workspace_id}/{file_path}"
)
# Validate inputs
workspace_path = await self.get_workspace_path(workspace_id, context)
if not self._is_safe_file_path(file_path):
raise ValidationError(
"file_path",
file_path,
"File path contains invalid characters or attempts directory traversal",
context
)
target_path = workspace_path / "files" / file_path
if not target_path.exists():
raise ResourceNotFoundError("File", f"{workspace_id}/{file_path}", context)
if not target_path.is_file():
raise ValidationError(
"file_path",
file_path,
"Path exists but is not a regular file",
context
)
try:
# Read file with encoding detection
content = target_path.read_text(encoding='utf-8')
logger.debug(f"Read file {file_path} from workspace {workspace_id}")
return content
except UnicodeDecodeError as e:
logger.error(f"Failed to decode file {file_path} as UTF-8: {e}")
raise ValidationError(
"file_content",
"binary data",
"File does not contain valid UTF-8 text",
context
)
except OSError as e:
logger.error(f"Failed to read file {file_path} from workspace {workspace_id}: {e}")
raise self._map_os_error_to_exception(e, f"read file {file_path}", context)
async def delete_workspace(
self,
workspace_id: str,
context: Optional[ErrorContext] = None
) -> bool:
"""Delete a workspace and all its contents."""
if context is None:
context = ErrorContext(
operation_id=f"delete_workspace_{workspace_id}",
operation_type=OperationType.DELETE,
resource_type="Workspace",
resource_id=workspace_id
)
workspace_path = await self.get_workspace_path(workspace_id, context)
try:
# Use shutil.rmtree for recursive deletion
shutil.rmtree(workspace_path)
logger.info(f"Deleted workspace: {workspace_id}")
return True
except OSError as e:
logger.error(f"Failed to delete workspace {workspace_id}: {e}")
raise self._map_os_error_to_exception(e, f"delete workspace {workspace_id}", context)
async def list_files(
self,
workspace_id: str,
pattern: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> List[str]:
"""List files in a workspace."""
if context is None:
context = ErrorContext(
operation_id=f"list_files_{workspace_id}",
operation_type=OperationType.READ,
resource_type="WorkspaceFile",
metadata={"workspace_id": workspace_id, "pattern": pattern}
)
workspace_path = await self.get_workspace_path(workspace_id, context)
files_dir = workspace_path / "files"
if not files_dir.exists():
return []
try:
files = []
# Walk through all files in the workspace
for item in files_dir.rglob("*"):
if item.is_file():
# Get relative path from files directory
relative_path = str(item.relative_to(files_dir))
# Apply pattern filter if provided
if pattern is None or self._matches_pattern(relative_path, pattern):
files.append(relative_path)
return sorted(files)
except OSError as e:
logger.error(f"Failed to list files in workspace {workspace_id}: {e}")
raise self._map_os_error_to_exception(e, f"list files in workspace {workspace_id}", context)
async def cleanup_old_workspaces(self, days_threshold: int = 30) -> int:
"""Clean up workspaces older than specified days."""
logger.info(f"Starting cleanup of workspaces older than {days_threshold} days")
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_threshold)
deleted_count = 0
if not self.base_path.exists():
return 0
for workspace_dir in self.base_path.iterdir():
if not workspace_dir.is_dir():
continue
try:
# Check workspace metadata for creation date
metadata_file = workspace_dir / ".workspace_meta.json"
if not metadata_file.exists():
continue
metadata = await self._read_json_file(metadata_file)
created_at_str = metadata.get("created_at")
if not created_at_str:
continue
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
if created_at < cutoff_date:
await self.delete_workspace(workspace_dir.name)
deleted_count += 1
logger.info(f"Cleaned up old workspace: {workspace_dir.name}")
except Exception as e:
logger.warning(f"Failed to process workspace {workspace_dir.name} during cleanup: {e}")
continue
logger.info(f"Cleanup completed: deleted {deleted_count} old workspaces")
return deleted_count
except Exception as e:
logger.error(f"Error during workspace cleanup: {e}")
return 0
# Helper methods
def _is_valid_workspace_id(self, workspace_id: str) -> bool:
"""Validate workspace ID format."""
if not workspace_id or len(workspace_id) > 100:
return False
# Allow alphanumeric, dash, underscore
import re
return re.match(r'^[a-zA-Z0-9_-]+$', workspace_id) is not None
def _is_safe_file_path(self, file_path: str) -> bool:
"""Check if file path is safe (no directory traversal)."""
if not file_path:
return False
# Normalize path
normalized = os.path.normpath(file_path)
# Check for directory traversal attempts
if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
return False
# Check for absolute paths
if os.path.isabs(normalized):
return False
# Check for unsafe characters
unsafe_chars = {"<", ">", ":", "\"", "|", "?", "*", "\0"}
if any(char in file_path for char in unsafe_chars):
return False
return True
def _matches_pattern(self, file_path: str, pattern: str) -> bool:
"""Check if file path matches the given pattern."""
import fnmatch
return fnmatch.fnmatch(file_path.lower(), pattern.lower())
async def _atomic_write_file(self, target_path: Path, content: str, context: ErrorContext):
"""Write file atomically using temporary file."""
temp_dir = target_path.parent / ".tmp"
temp_dir.mkdir(exist_ok=True)
# Create temporary file in same directory as target
temp_fd, temp_path = tempfile.mkstemp(
dir=temp_dir,
prefix=f".tmp_{target_path.name}_",
suffix=".tmp"
)
try:
# Write content to temporary file
with os.fdopen(temp_fd, 'w', encoding='utf-8') as f:
f.write(content)
f.flush()
os.fsync(f.fileno()) # Ensure data is written to disk
# Atomic move to final location
temp_path_obj = Path(temp_path)
temp_path_obj.replace(target_path)
except Exception:
# Clean up temporary file on error
try:
os.unlink(temp_path)
except OSError:
pass
raise
finally:
# Clean up temp directory if empty
try:
temp_dir.rmdir()
except OSError:
pass # Directory not empty or doesn't exist
async def _write_json_file(self, file_path: Path, data: dict, context: Optional[ErrorContext] = None):
"""Write JSON data to file atomically."""
import json
json_content = json.dumps(data, indent=2)
await self._atomic_write_file(file_path, json_content, context)
async def _read_json_file(self, file_path: Path) -> dict:
"""Read JSON data from file."""
import json
content = file_path.read_text(encoding='utf-8')
return json.loads(content)
def _map_os_error_to_exception(self, os_error: OSError, operation: str, context: ErrorContext):
"""Map OS errors to appropriate domain exceptions."""
from infrastructure.exceptions import (
ResourceNotFoundError, ValidationError, DatabaseError
)
if os_error.errno == 2: # No such file or directory
return ResourceNotFoundError("File", operation, context)
elif os_error.errno == 13: # Permission denied
return ValidationError("permissions", operation, "Permission denied", context)
elif os_error.errno == 28: # No space left on device
return DatabaseError(f"Insufficient disk space for {operation}", os_error, context)
elif os_error.errno == 17: # File exists
return DuplicateResourceError("File", operation, context)
else:
return DatabaseError(f"Filesystem error during {operation}", os_error, context)

View File

@@ -0,0 +1,618 @@
"""
Gitea repository implementation with async HTTP client.
Provides high-performance, reliable access to Gitea API with connection pooling,
retry mechanisms, and proper error handling.
"""
import asyncio
import json
from infrastructure.logging import get_logger
from typing import List, Optional, Dict, Any
from datetime import datetime
import aiohttp
from domain.issues.models import Issue, Label, IssueState
from domain.projects.models import Project, Milestone, ProjectState
from infrastructure.repositories.interfaces import IssueRepository, ProjectRepository
from infrastructure.connection_manager import ConnectionManager, retry_with_backoff, RetryConfig
from infrastructure.exceptions import (
ErrorContext, OperationType, GiteaApiError, NetworkError,
ResourceNotFoundError, ValidationError, ConcurrencyError
)
logger = get_logger(__name__)
class GiteaIssueRepository(IssueRepository):
"""
Gitea implementation of IssueRepository using async HTTP client.
Provides efficient access to Gitea issues API with connection pooling,
automatic retries, and proper error handling.
"""
def __init__(self, connection_manager: ConnectionManager, retry_config: Optional[RetryConfig] = None):
self.connection_manager = connection_manager
self.retry_config = retry_config or RetryConfig()
@retry_with_backoff(RetryConfig())
async def get_issue(self, issue_number: int, context: Optional[ErrorContext] = None) -> Issue:
"""Retrieve an issue by its number from Gitea API."""
if context is None:
context = ErrorContext(
operation_id=f"get_issue_{issue_number}",
operation_type=OperationType.READ,
resource_type="Issue",
resource_id=str(issue_number)
)
try:
session = await self.connection_manager.get_http_session()
async with session.get(f"/api/v1/repos/issues/{issue_number}") as response:
await self._handle_response_errors(response, context)
data = await response.json()
return self._map_api_issue_to_domain(data)
except aiohttp.ClientError as e:
logger.error(f"Network error getting issue {issue_number}: {e}")
raise NetworkError(f"get issue {issue_number}", e, context)
@retry_with_backoff(RetryConfig())
async def get_issues(
self,
project_id: Optional[str] = None,
state: Optional[str] = None,
labels: Optional[List[str]] = None,
limit: int = 100,
offset: int = 0,
context: Optional[ErrorContext] = None
) -> List[Issue]:
"""Retrieve multiple issues with filtering and pagination."""
if context is None:
context = ErrorContext(
operation_id=f"get_issues_{project_id or 'all'}",
operation_type=OperationType.READ,
resource_type="Issue",
metadata={
"project_id": project_id,
"state": state,
"labels": labels,
"limit": limit,
"offset": offset
}
)
try:
session = await self.connection_manager.get_http_session()
# Build query parameters
params = {
"limit": limit,
"page": (offset // limit) + 1 # Gitea uses 1-based pagination
}
if state:
params["state"] = state
if labels:
params["labels"] = ",".join(labels)
async with session.get("/api/v1/repos/issues", params=params) as response:
await self._handle_response_errors(response, context)
data = await response.json()
return [self._map_api_issue_to_domain(issue_data) for issue_data in data]
except aiohttp.ClientError as e:
logger.error(f"Network error getting issues: {e}")
raise NetworkError("get issues", e, context)
@retry_with_backoff(RetryConfig())
async def create_issue(
self,
title: str,
body: str,
labels: Optional[List[str]] = None,
assignees: Optional[List[str]] = None,
context: Optional[ErrorContext] = None
) -> Issue:
"""Create a new issue via Gitea API."""
if context is None:
context = ErrorContext(
operation_id=f"create_issue_{title[:50]}",
operation_type=OperationType.WRITE,
resource_type="Issue",
request_data={
"title": title,
"body": body,
"labels": labels,
"assignees": assignees
}
)
# Validate input
if not title or not title.strip():
raise ValidationError("title", title, "Title cannot be empty", context)
if len(title) > 255:
raise ValidationError("title", title, "Title cannot exceed 255 characters", context)
try:
session = await self.connection_manager.get_http_session()
# Prepare request payload
payload = {
"title": title.strip(),
"body": body or ""
}
if labels:
payload["labels"] = labels
if assignees:
payload["assignees"] = assignees
async with session.post("/api/v1/repos/issues", json=payload) as response:
await self._handle_response_errors(response, context)
data = await response.json()
created_issue = self._map_api_issue_to_domain(data)
logger.info(f"Created issue #{created_issue.number}: {title}")
return created_issue
except aiohttp.ClientError as e:
logger.error(f"Network error creating issue '{title}': {e}")
raise NetworkError(f"create issue '{title}'", e, context)
@retry_with_backoff(RetryConfig())
async def update_issue(
self,
issue_number: int,
title: Optional[str] = None,
body: Optional[str] = None,
state: Optional[str] = None,
labels: Optional[List[str]] = None,
context: Optional[ErrorContext] = None
) -> Issue:
"""Update an existing issue via Gitea API."""
if context is None:
context = ErrorContext(
operation_id=f"update_issue_{issue_number}",
operation_type=OperationType.UPDATE,
resource_type="Issue",
resource_id=str(issue_number),
request_data={
"title": title,
"body": body,
"state": state,
"labels": labels
}
)
# Validate input
if title is not None:
if not title.strip():
raise ValidationError("title", title, "Title cannot be empty", context)
if len(title) > 255:
raise ValidationError("title", title, "Title cannot exceed 255 characters", context)
if state is not None and state not in ["open", "closed"]:
raise ValidationError("state", state, "State must be 'open' or 'closed'", context)
try:
session = await self.connection_manager.get_http_session()
# First, get current issue to check for concurrent modifications
current_issue = await self.get_issue(issue_number, context)
# Prepare update payload
payload = {}
if title is not None:
payload["title"] = title.strip()
if body is not None:
payload["body"] = body
if state is not None:
payload["state"] = state
if labels is not None:
payload["labels"] = labels
# Only update if there are changes
if not payload:
return current_issue
async with session.patch(f"/api/v1/repos/issues/{issue_number}", json=payload) as response:
# Handle potential concurrent modification
if response.status == 409:
raise ConcurrencyError("Issue", str(issue_number), context)
await self._handle_response_errors(response, context)
data = await response.json()
updated_issue = self._map_api_issue_to_domain(data)
logger.info(f"Updated issue #{issue_number}")
return updated_issue
except aiohttp.ClientError as e:
logger.error(f"Network error updating issue {issue_number}: {e}")
raise NetworkError(f"update issue {issue_number}", e, context)
async def get_issue_project_info(
self,
issue_number: int,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""Get project-related information for an issue."""
if context is None:
context = ErrorContext(
operation_id=f"get_issue_project_info_{issue_number}",
operation_type=OperationType.READ,
resource_type="ProjectInfo",
resource_id=str(issue_number)
)
try:
session = await self.connection_manager.get_http_session()
# Get issue details first
issue = await self.get_issue(issue_number, context)
# Get repository information
async with session.get("/api/v1/repos") as response:
await self._handle_response_errors(response, context)
repo_data = await response.json()
# Get project boards if available
project_info = {
"repository": repo_data,
"kanban_columns": ["Todo", "In Progress", "Review", "Done"], # Default columns
"issue": {
"number": issue.number,
"title": issue.title,
"state": issue.state.value,
"labels": [label.name for label in issue.labels]
}
}
# Try to get actual project boards
try:
async with session.get("/api/v1/repos/projects") as projects_response:
if projects_response.status == 200:
projects_data = await projects_response.json()
if projects_data:
# Use first project's columns if available
project_info["projects"] = projects_data
except Exception:
# Projects API might not be available, use defaults
pass
return project_info
except aiohttp.ClientError as e:
logger.error(f"Network error getting project info for issue {issue_number}: {e}")
raise NetworkError(f"get project info for issue {issue_number}", e, context)
def _map_api_issue_to_domain(self, api_data: Dict[str, Any]) -> Issue:
"""Map Gitea API issue data to domain Issue object."""
# Map labels
labels = []
if "labels" in api_data:
for label_data in api_data["labels"]:
label = Label(
name=label_data["name"],
color=label_data.get("color", ""),
description=label_data.get("description", "")
)
labels.append(label)
# Map state
state_value = api_data.get("state", "open")
issue_state = IssueState.OPEN if state_value == "open" else IssueState.CLOSED
# Parse dates
created_at = datetime.fromisoformat(api_data["created_at"].replace("Z", "+00:00"))
updated_at = datetime.fromisoformat(api_data["updated_at"].replace("Z", "+00:00"))
closed_at = None
if api_data.get("closed_at"):
closed_at = datetime.fromisoformat(api_data["closed_at"].replace("Z", "+00:00"))
return Issue(
number=api_data["number"],
title=api_data["title"],
body=api_data.get("body", ""),
state=issue_state,
labels=labels,
assignees=api_data.get("assignees", []),
author=api_data.get("user", {}).get("login", "unknown"),
created_at=created_at,
updated_at=updated_at,
closed_at=closed_at,
url=api_data.get("html_url", "")
)
async def _handle_response_errors(self, response: aiohttp.ClientResponse, context: ErrorContext):
"""Handle HTTP response errors and convert to appropriate exceptions."""
if response.status == 200 or response.status == 201:
return
response_text = await response.text()
if response.status == 404:
resource_id = context.resource_id or "unknown"
raise ResourceNotFoundError(context.resource_type, resource_id, context)
elif response.status == 401:
raise GiteaApiError(
response.status,
"Authentication failed - check API token",
str(response.url),
context
)
elif response.status == 403:
raise GiteaApiError(
response.status,
"Access forbidden - check API permissions",
str(response.url),
context
)
elif response.status == 409:
# Conflict - usually concurrent modification
raise ConcurrencyError(context.resource_type, context.resource_id or "unknown", context)
elif response.status == 422:
# Validation error
try:
error_data = await response.json()
error_message = error_data.get("message", response_text)
except:
error_message = response_text
raise ValidationError("request", None, error_message, context)
elif response.status >= 500:
raise GiteaApiError(
response.status,
f"Server error: {response_text}",
str(response.url),
context
)
else:
raise GiteaApiError(
response.status,
response_text,
str(response.url),
context
)
class GiteaProjectRepository(ProjectRepository):
"""
Gitea implementation of ProjectRepository.
Provides access to project and milestone information via Gitea API.
"""
def __init__(self, connection_manager: ConnectionManager, retry_config: Optional[RetryConfig] = None):
self.connection_manager = connection_manager
self.retry_config = retry_config or RetryConfig()
@retry_with_backoff(RetryConfig())
async def get_project(self, project_id: str, context: Optional[ErrorContext] = None) -> Project:
"""Retrieve a project by its ID from Gitea API."""
if context is None:
context = ErrorContext(
operation_id=f"get_project_{project_id}",
operation_type=OperationType.READ,
resource_type="Project",
resource_id=project_id
)
try:
session = await self.connection_manager.get_http_session()
async with session.get(f"/api/v1/repos/projects/{project_id}") as response:
await self._handle_response_errors(response, context)
data = await response.json()
return self._map_api_project_to_domain(data)
except aiohttp.ClientError as e:
logger.error(f"Network error getting project {project_id}: {e}")
raise NetworkError(f"get project {project_id}", e, context)
@retry_with_backoff(RetryConfig())
async def get_projects(
self,
organization: Optional[str] = None,
limit: int = 100,
offset: int = 0,
context: Optional[ErrorContext] = None
) -> List[Project]:
"""Retrieve multiple projects with pagination."""
if context is None:
context = ErrorContext(
operation_id=f"get_projects_{organization or 'all'}",
operation_type=OperationType.READ,
resource_type="Project",
metadata={
"organization": organization,
"limit": limit,
"offset": offset
}
)
try:
session = await self.connection_manager.get_http_session()
params = {
"limit": limit,
"page": (offset // limit) + 1
}
endpoint = "/api/v1/repos/projects"
if organization:
endpoint = f"/api/v1/orgs/{organization}/projects"
async with session.get(endpoint, params=params) as response:
await self._handle_response_errors(response, context)
data = await response.json()
return [self._map_api_project_to_domain(project_data) for project_data in data]
except aiohttp.ClientError as e:
logger.error(f"Network error getting projects: {e}")
raise NetworkError("get projects", e, context)
@retry_with_backoff(RetryConfig())
async def get_milestones(
self,
project_id: str,
state: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> List[Milestone]:
"""Retrieve milestones for a project."""
if context is None:
context = ErrorContext(
operation_id=f"get_milestones_{project_id}",
operation_type=OperationType.READ,
resource_type="Milestone",
metadata={"project_id": project_id, "state": state}
)
try:
session = await self.connection_manager.get_http_session()
params = {}
if state:
params["state"] = state
async with session.get(f"/api/v1/repos/milestones", params=params) as response:
await self._handle_response_errors(response, context)
data = await response.json()
return [self._map_api_milestone_to_domain(milestone_data) for milestone_data in data]
except aiohttp.ClientError as e:
logger.error(f"Network error getting milestones for project {project_id}: {e}")
raise NetworkError(f"get milestones for project {project_id}", e, context)
@retry_with_backoff(RetryConfig())
async def create_milestone(
self,
project_id: str,
title: str,
description: str,
due_date: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> Milestone:
"""Create a new milestone for a project."""
if context is None:
context = ErrorContext(
operation_id=f"create_milestone_{title[:50]}",
operation_type=OperationType.WRITE,
resource_type="Milestone",
request_data={
"project_id": project_id,
"title": title,
"description": description,
"due_date": due_date
}
)
# Validate input
if not title or not title.strip():
raise ValidationError("title", title, "Milestone title cannot be empty", context)
try:
session = await self.connection_manager.get_http_session()
payload = {
"title": title.strip(),
"description": description or ""
}
if due_date:
payload["due_on"] = due_date
async with session.post("/api/v1/repos/milestones", json=payload) as response:
await self._handle_response_errors(response, context)
data = await response.json()
created_milestone = self._map_api_milestone_to_domain(data)
logger.info(f"Created milestone: {title}")
return created_milestone
except aiohttp.ClientError as e:
logger.error(f"Network error creating milestone '{title}': {e}")
raise NetworkError(f"create milestone '{title}'", e, context)
def _map_api_project_to_domain(self, api_data: Dict[str, Any]) -> Project:
"""Map Gitea API project data to domain Project object."""
# For now, create a basic project since Gitea projects API might be limited
created_at = datetime.fromisoformat(api_data.get("created_at", datetime.utcnow().isoformat()).replace("Z", "+00:00"))
updated_at = datetime.fromisoformat(api_data.get("updated_at", datetime.utcnow().isoformat()).replace("Z", "+00:00"))
return Project(
id=str(api_data.get("id", 0)),
name=api_data.get("title", api_data.get("name", "Unknown Project")),
description=api_data.get("body", api_data.get("description", "")),
state=ProjectState.ACTIVE, # Default to active
milestones=[], # Will be populated separately
created_at=created_at,
updated_at=updated_at
)
def _map_api_milestone_to_domain(self, api_data: Dict[str, Any]) -> Milestone:
"""Map Gitea API milestone data to domain Milestone object."""
created_at = datetime.fromisoformat(api_data["created_at"].replace("Z", "+00:00"))
updated_at = datetime.fromisoformat(api_data["updated_at"].replace("Z", "+00:00"))
due_date = None
if api_data.get("due_on"):
due_date = datetime.fromisoformat(api_data["due_on"].replace("Z", "+00:00"))
return Milestone(
id=api_data["id"],
title=api_data["title"],
description=api_data.get("description", ""),
state=api_data.get("state", "open"),
open_issues=api_data.get("open_issues", 0),
closed_issues=api_data.get("closed_issues", 0),
due_date=due_date,
created_at=created_at,
updated_at=updated_at
)
async def _handle_response_errors(self, response: aiohttp.ClientResponse, context: ErrorContext):
"""Handle HTTP response errors and convert to appropriate exceptions."""
# Reuse the same error handling logic from GiteaIssueRepository
if response.status == 200 or response.status == 201:
return
response_text = await response.text()
if response.status == 404:
resource_id = context.resource_id or "unknown"
raise ResourceNotFoundError(context.resource_type, resource_id, context)
elif response.status >= 400:
raise GiteaApiError(
response.status,
response_text,
str(response.url),
context
)

View File

@@ -0,0 +1,680 @@
"""
Abstract repository interfaces for data access patterns.
Defines the contracts for data access operations across different
data sources, enabling clean separation between business logic
and infrastructure concerns.
"""
from abc import ABC, abstractmethod
from typing import List, Optional, Dict, Any, AsyncContextManager
from pathlib import Path
from domain.issues.models import Issue
from domain.projects.models import Project, Milestone
from infrastructure.exceptions import ErrorContext
class IssueRepository(ABC):
"""Abstract repository for issue-related operations."""
@abstractmethod
async def get_issue(self, issue_number: int, context: Optional[ErrorContext] = None) -> Issue:
"""
Retrieve an issue by its number.
Args:
issue_number: The issue number to retrieve
context: Error context for tracking operations
Returns:
Issue domain object
Raises:
ResourceNotFoundError: If issue doesn't exist
GiteaApiError: If API request fails
NetworkError: If network connectivity fails
"""
pass
@abstractmethod
async def get_issues(
self,
project_id: Optional[str] = None,
state: Optional[str] = None,
labels: Optional[List[str]] = None,
limit: int = 100,
offset: int = 0,
context: Optional[ErrorContext] = None
) -> List[Issue]:
"""
Retrieve multiple issues with filtering and pagination.
Args:
project_id: Filter by project ID
state: Filter by issue state (open, closed)
labels: Filter by labels
limit: Maximum number of issues to return
offset: Number of issues to skip
context: Error context for tracking operations
Returns:
List of Issue domain objects
Raises:
GiteaApiError: If API request fails
NetworkError: If network connectivity fails
"""
pass
@abstractmethod
async def create_issue(
self,
title: str,
body: str,
labels: Optional[List[str]] = None,
assignees: Optional[List[str]] = None,
context: Optional[ErrorContext] = None
) -> Issue:
"""
Create a new issue.
Args:
title: Issue title
body: Issue description
labels: List of label names
assignees: List of assignee usernames
context: Error context for tracking operations
Returns:
Created Issue domain object
Raises:
ValidationError: If input data is invalid
GiteaApiError: If API request fails
NetworkError: If network connectivity fails
"""
pass
@abstractmethod
async def update_issue(
self,
issue_number: int,
title: Optional[str] = None,
body: Optional[str] = None,
state: Optional[str] = None,
labels: Optional[List[str]] = None,
context: Optional[ErrorContext] = None
) -> Issue:
"""
Update an existing issue.
Args:
issue_number: Issue number to update
title: New title (if provided)
body: New body (if provided)
state: New state (if provided)
labels: New labels (if provided)
context: Error context for tracking operations
Returns:
Updated Issue domain object
Raises:
ResourceNotFoundError: If issue doesn't exist
ValidationError: If input data is invalid
GiteaApiError: If API request fails
ConcurrencyError: If issue was modified concurrently
"""
pass
@abstractmethod
async def get_issue_project_info(
self,
issue_number: int,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""
Get project-related information for an issue.
Args:
issue_number: Issue number
context: Error context for tracking operations
Returns:
Project information dictionary
Raises:
ResourceNotFoundError: If issue doesn't exist
GiteaApiError: If API request fails
"""
pass
class ProjectRepository(ABC):
"""Abstract repository for project-related operations."""
@abstractmethod
async def get_project(self, project_id: str, context: Optional[ErrorContext] = None) -> Project:
"""
Retrieve a project by its ID.
Args:
project_id: Project identifier
context: Error context for tracking operations
Returns:
Project domain object
Raises:
ResourceNotFoundError: If project doesn't exist
GiteaApiError: If API request fails
"""
pass
@abstractmethod
async def get_projects(
self,
organization: Optional[str] = None,
limit: int = 100,
offset: int = 0,
context: Optional[ErrorContext] = None
) -> List[Project]:
"""
Retrieve multiple projects with pagination.
Args:
organization: Filter by organization
limit: Maximum number of projects to return
offset: Number of projects to skip
context: Error context for tracking operations
Returns:
List of Project domain objects
Raises:
GiteaApiError: If API request fails
"""
pass
@abstractmethod
async def get_milestones(
self,
project_id: str,
state: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> List[Milestone]:
"""
Retrieve milestones for a project.
Args:
project_id: Project identifier
state: Filter by milestone state
context: Error context for tracking operations
Returns:
List of Milestone domain objects
Raises:
ResourceNotFoundError: If project doesn't exist
GiteaApiError: If API request fails
"""
pass
@abstractmethod
async def create_milestone(
self,
project_id: str,
title: str,
description: str,
due_date: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> Milestone:
"""
Create a new milestone for a project.
Args:
project_id: Project identifier
title: Milestone title
description: Milestone description
due_date: Due date (ISO format)
context: Error context for tracking operations
Returns:
Created Milestone domain object
Raises:
ResourceNotFoundError: If project doesn't exist
ValidationError: If input data is invalid
GiteaApiError: If API request fails
"""
pass
class DocumentRepository(ABC):
"""Abstract repository for document storage and retrieval."""
@abstractmethod
async def store_document(
self,
filename: str,
content: str,
ast: Dict[str, Any],
context: Optional[ErrorContext] = None
) -> str:
"""
Store a document with its AST representation.
Args:
filename: Document filename
content: Document content
ast: Parsed AST representation
context: Error context for tracking operations
Returns:
Document ID
Raises:
ValidationError: If input data is invalid
DatabaseError: If storage operation fails
DuplicateResourceError: If document already exists
"""
pass
@abstractmethod
async def get_document(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""
Retrieve a document by its ID.
Args:
document_id: Document identifier
context: Error context for tracking operations
Returns:
Document data dictionary
Raises:
ResourceNotFoundError: If document doesn't exist
DatabaseError: If retrieval operation fails
"""
pass
@abstractmethod
async def get_documents(
self,
filename_pattern: Optional[str] = None,
limit: int = 100,
offset: int = 0,
context: Optional[ErrorContext] = None
) -> List[Dict[str, Any]]:
"""
Retrieve multiple documents with filtering and pagination.
Args:
filename_pattern: Filter by filename pattern
limit: Maximum number of documents to return
offset: Number of documents to skip
context: Error context for tracking operations
Returns:
List of document data dictionaries
Raises:
DatabaseError: If retrieval operation fails
"""
pass
@abstractmethod
async def update_document(
self,
document_id: str,
content: Optional[str] = None,
ast: Optional[Dict[str, Any]] = None,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""
Update an existing document.
Args:
document_id: Document identifier
content: New content (if provided)
ast: New AST (if provided)
context: Error context for tracking operations
Returns:
Updated document data
Raises:
ResourceNotFoundError: If document doesn't exist
ValidationError: If input data is invalid
DatabaseError: If update operation fails
"""
pass
@abstractmethod
async def delete_document(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> bool:
"""
Delete a document.
Args:
document_id: Document identifier
context: Error context for tracking operations
Returns:
True if document was deleted
Raises:
ResourceNotFoundError: If document doesn't exist
DatabaseError: If deletion operation fails
"""
pass
@abstractmethod
async def get_cache_path(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> Path:
"""
Get the cache file path for a document.
Args:
document_id: Document identifier
context: Error context for tracking operations
Returns:
Path to cache file
Raises:
ResourceNotFoundError: If document doesn't exist
"""
pass
class WorkspaceRepository(ABC):
"""Abstract repository for workspace file operations."""
@abstractmethod
async def create_workspace(
self,
workspace_id: str,
base_path: Path,
context: Optional[ErrorContext] = None
) -> Path:
"""
Create a new workspace directory.
Args:
workspace_id: Workspace identifier
base_path: Base directory for workspaces
context: Error context for tracking operations
Returns:
Path to created workspace
Raises:
DuplicateResourceError: If workspace already exists
ValidationError: If paths are invalid
FileSystemError: If directory creation fails
"""
pass
@abstractmethod
async def get_workspace_path(
self,
workspace_id: str,
context: Optional[ErrorContext] = None
) -> Path:
"""
Get the path to a workspace.
Args:
workspace_id: Workspace identifier
context: Error context for tracking operations
Returns:
Path to workspace directory
Raises:
ResourceNotFoundError: If workspace doesn't exist
"""
pass
@abstractmethod
async def list_workspaces(
self,
context: Optional[ErrorContext] = None
) -> List[str]:
"""
List all available workspaces.
Args:
context: Error context for tracking operations
Returns:
List of workspace identifiers
Raises:
FileSystemError: If directory listing fails
"""
pass
@abstractmethod
async def write_file(
self,
workspace_id: str,
file_path: str,
content: str,
context: Optional[ErrorContext] = None
) -> Path:
"""
Write content to a file in the workspace.
Args:
workspace_id: Workspace identifier
file_path: Relative path within workspace
content: File content
context: Error context for tracking operations
Returns:
Full path to written file
Raises:
ResourceNotFoundError: If workspace doesn't exist
ValidationError: If file path is invalid
FileSystemError: If write operation fails
"""
pass
@abstractmethod
async def read_file(
self,
workspace_id: str,
file_path: str,
context: Optional[ErrorContext] = None
) -> str:
"""
Read content from a file in the workspace.
Args:
workspace_id: Workspace identifier
file_path: Relative path within workspace
context: Error context for tracking operations
Returns:
File content
Raises:
ResourceNotFoundError: If workspace or file doesn't exist
FileSystemError: If read operation fails
"""
pass
@abstractmethod
async def delete_workspace(
self,
workspace_id: str,
context: Optional[ErrorContext] = None
) -> bool:
"""
Delete a workspace and all its contents.
Args:
workspace_id: Workspace identifier
context: Error context for tracking operations
Returns:
True if workspace was deleted
Raises:
ResourceNotFoundError: If workspace doesn't exist
FileSystemError: If deletion fails
"""
pass
@abstractmethod
async def list_files(
self,
workspace_id: str,
pattern: Optional[str] = None,
context: Optional[ErrorContext] = None
) -> List[str]:
"""
List files in a workspace.
Args:
workspace_id: Workspace identifier
pattern: File pattern to match
context: Error context for tracking operations
Returns:
List of relative file paths
Raises:
ResourceNotFoundError: If workspace doesn't exist
FileSystemError: If listing fails
"""
pass
class CacheRepository(ABC):
"""Abstract repository for caching operations."""
@abstractmethod
async def get(
self,
key: str,
context: Optional[ErrorContext] = None
) -> Optional[Any]:
"""
Retrieve a value from cache.
Args:
key: Cache key
context: Error context for tracking operations
Returns:
Cached value or None if not found
Raises:
CacheError: If cache operation fails
"""
pass
@abstractmethod
async def set(
self,
key: str,
value: Any,
ttl: Optional[int] = None,
context: Optional[ErrorContext] = None
) -> bool:
"""
Store a value in cache.
Args:
key: Cache key
value: Value to cache
ttl: Time to live in seconds
context: Error context for tracking operations
Returns:
True if value was stored
Raises:
CacheError: If cache operation fails
"""
pass
@abstractmethod
async def delete(
self,
key: str,
context: Optional[ErrorContext] = None
) -> bool:
"""
Delete a value from cache.
Args:
key: Cache key
context: Error context for tracking operations
Returns:
True if value was deleted
Raises:
CacheError: If cache operation fails
"""
pass
@abstractmethod
async def invalidate_pattern(
self,
pattern: str,
context: Optional[ErrorContext] = None
) -> int:
"""
Invalidate cache entries matching a pattern.
Args:
pattern: Pattern to match (e.g., "user:*")
context: Error context for tracking operations
Returns:
Number of invalidated entries
Raises:
CacheInvalidationError: If invalidation fails
"""
pass
@abstractmethod
async def store_ast_cache(
self,
document_id: str,
ast: Dict[str, Any],
context: Optional[ErrorContext] = None
) -> bool:
"""
Store AST cache for a document.
Args:
document_id: Document identifier
ast: AST representation
context: Error context for tracking operations
Returns:
True if cache was stored
Raises:
CacheError: If cache operation fails
"""
pass

View File

@@ -0,0 +1,677 @@
"""
SQLite repository implementation with transaction support.
Provides efficient database operations with connection pooling,
transaction management, and proper error handling.
"""
import sqlite3
import json
import uuid
from infrastructure.logging import get_logger
from typing import List, Optional, Dict, Any
from datetime import datetime
from pathlib import Path
from contextlib import asynccontextmanager
from infrastructure.repositories.interfaces import DocumentRepository, CacheRepository
from infrastructure.connection_manager import ConnectionManager
from infrastructure.exceptions import (
ErrorContext, OperationType, DatabaseError, ConnectionError,
ResourceNotFoundError, DuplicateResourceError, ValidationError,
TransactionError, QueryError
)
logger = get_logger(__name__)
class SqliteDocumentRepository(DocumentRepository):
"""
SQLite implementation of DocumentRepository with transaction support.
Provides efficient document storage and retrieval with proper
transaction handling and optimized database operations.
"""
def __init__(self, connection_manager: ConnectionManager):
self.connection_manager = connection_manager
self._initialize_schema()
def _initialize_schema(self):
"""Initialize database schema for documents."""
try:
conn = self.connection_manager.get_database_connection()
# Create documents table
conn.execute("""
CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
filename TEXT NOT NULL,
content TEXT NOT NULL,
ast_json TEXT NOT NULL,
content_hash TEXT NOT NULL,
file_size INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(filename, content_hash)
)
""")
# Create cache table
conn.execute("""
CREATE TABLE IF NOT EXISTS ast_cache (
id TEXT PRIMARY KEY,
document_id TEXT NOT NULL,
cache_path TEXT NOT NULL,
cache_size INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
)
""")
# Create indexes for performance
conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_document_id ON ast_cache(document_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed_at ON ast_cache(accessed_at)")
conn.commit()
logger.info("Database schema initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize database schema: {e}")
raise ConnectionError("markitect.db", e)
async def store_document(
self,
filename: str,
content: str,
ast: Dict[str, Any],
context: Optional[ErrorContext] = None
) -> str:
"""Store a document with its AST representation."""
if context is None:
context = ErrorContext(
operation_id=f"store_document_{filename}",
operation_type=OperationType.WRITE,
resource_type="Document",
request_data={
"filename": filename,
"content_length": len(content),
"ast_keys": list(ast.keys()) if ast else []
}
)
# Validate input
if not filename or not filename.strip():
raise ValidationError("filename", filename, "Filename cannot be empty", context)
if not content:
raise ValidationError("content", content, "Content cannot be empty", context)
if not ast:
raise ValidationError("ast", ast, "AST cannot be empty", context)
try:
async with self.connection_manager.transaction() as conn:
# Generate unique document ID
document_id = str(uuid.uuid4())
# Calculate content hash for deduplication
import hashlib
content_hash = hashlib.sha256(content.encode()).hexdigest()
# Check for duplicate content
cursor = conn.execute(
"SELECT id FROM documents WHERE filename = ? AND content_hash = ?",
(filename, content_hash)
)
existing = cursor.fetchone()
if existing:
raise DuplicateResourceError("Document", filename, context)
# Store document
ast_json = json.dumps(ast)
file_size = len(content)
now = datetime.utcnow().isoformat()
conn.execute("""
INSERT INTO documents (id, filename, content, ast_json, content_hash, file_size, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (document_id, filename, content, ast_json, content_hash, file_size, now, now))
logger.info(f"Stored document {filename} with ID {document_id}")
return document_id
except sqlite3.IntegrityError as e:
if "UNIQUE constraint failed" in str(e):
raise DuplicateResourceError("Document", filename, context)
else:
raise DatabaseError(f"Integrity error storing document {filename}", e, context)
except Exception as e:
logger.error(f"Error storing document {filename}: {e}")
raise TransactionError(f"store document {filename}", e, context)
async def get_document(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""Retrieve a document by its ID."""
if context is None:
context = ErrorContext(
operation_id=f"get_document_{document_id}",
operation_type=OperationType.READ,
resource_type="Document",
resource_id=document_id
)
try:
conn = self.connection_manager.get_database_connection()
cursor = conn.execute("""
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
FROM documents
WHERE id = ?
""", (document_id,))
row = cursor.fetchone()
if not row:
raise ResourceNotFoundError("Document", document_id, context)
# Parse the row data
return {
"id": row[0],
"filename": row[1],
"content": row[2],
"ast": json.loads(row[3]),
"content_hash": row[4],
"file_size": row[5],
"created_at": row[6],
"updated_at": row[7]
}
except ResourceNotFoundError:
# Re-raise ResourceNotFoundError as-is
raise
except json.JSONDecodeError as e:
logger.error(f"Failed to parse AST JSON for document {document_id}: {e}")
raise QueryError(
f"SELECT * FROM documents WHERE id = '{document_id}'",
{"document_id": document_id},
e,
context
)
except Exception as e:
logger.error(f"Error retrieving document {document_id}: {e}")
raise QueryError(
f"SELECT * FROM documents WHERE id = '{document_id}'",
{"document_id": document_id},
e,
context
)
async def get_documents(
self,
filename_pattern: Optional[str] = None,
limit: int = 100,
offset: int = 0,
context: Optional[ErrorContext] = None
) -> List[Dict[str, Any]]:
"""Retrieve multiple documents with filtering and pagination."""
if context is None:
context = ErrorContext(
operation_id=f"get_documents_{filename_pattern or 'all'}",
operation_type=OperationType.READ,
resource_type="Document",
metadata={
"filename_pattern": filename_pattern,
"limit": limit,
"offset": offset
}
)
try:
conn = self.connection_manager.get_database_connection()
# Build query based on filter
if filename_pattern:
query = """
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
FROM documents
WHERE filename LIKE ?
ORDER BY created_at DESC
LIMIT ? OFFSET ?
"""
params = (f"%{filename_pattern}%", limit, offset)
else:
query = """
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
FROM documents
ORDER BY created_at DESC
LIMIT ? OFFSET ?
"""
params = (limit, offset)
cursor = conn.execute(query, params)
rows = cursor.fetchall()
documents = []
for row in rows:
try:
document = {
"id": row[0],
"filename": row[1],
"content": row[2],
"ast": json.loads(row[3]),
"content_hash": row[4],
"file_size": row[5],
"created_at": row[6],
"updated_at": row[7]
}
documents.append(document)
except json.JSONDecodeError as e:
logger.warning(f"Skipping document {row[0]} due to invalid AST JSON: {e}")
continue
return documents
except Exception as e:
logger.error(f"Error retrieving documents: {e}")
raise QueryError("SELECT documents with pagination", {"limit": limit, "offset": offset}, e, context)
async def update_document(
self,
document_id: str,
content: Optional[str] = None,
ast: Optional[Dict[str, Any]] = None,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""Update an existing document."""
if context is None:
context = ErrorContext(
operation_id=f"update_document_{document_id}",
operation_type=OperationType.UPDATE,
resource_type="Document",
resource_id=document_id,
request_data={
"content_length": len(content) if content else None,
"ast_keys": list(ast.keys()) if ast else None
}
)
try:
async with self.connection_manager.transaction() as conn:
# Check if document exists
cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
if not cursor.fetchone():
raise ResourceNotFoundError("Document", document_id, context)
# Build update query
updates = []
params = []
if content is not None:
# Recalculate content hash
import hashlib
content_hash = hashlib.sha256(content.encode()).hexdigest()
file_size = len(content)
updates.extend(["content = ?", "content_hash = ?", "file_size = ?"])
params.extend([content, content_hash, file_size])
if ast is not None:
ast_json = json.dumps(ast)
updates.append("ast_json = ?")
params.append(ast_json)
if not updates:
# No changes to make
return await self.get_document(document_id, context)
# Add updated timestamp
updates.append("updated_at = ?")
params.append(datetime.utcnow().isoformat())
# Add document_id for WHERE clause
params.append(document_id)
query = f"UPDATE documents SET {', '.join(updates)} WHERE id = ?"
conn.execute(query, params)
logger.info(f"Updated document {document_id}")
# Return updated document
return await self.get_document(document_id, context)
except Exception as e:
logger.error(f"Error updating document {document_id}: {e}")
raise TransactionError(f"update document {document_id}", e, context)
async def delete_document(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> bool:
"""Delete a document."""
if context is None:
context = ErrorContext(
operation_id=f"delete_document_{document_id}",
operation_type=OperationType.DELETE,
resource_type="Document",
resource_id=document_id
)
try:
async with self.connection_manager.transaction() as conn:
# Check if document exists
cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
if not cursor.fetchone():
raise ResourceNotFoundError("Document", document_id, context)
# Delete associated cache entries first (due to foreign key)
conn.execute("DELETE FROM ast_cache WHERE document_id = ?", (document_id,))
# Delete document
cursor = conn.execute("DELETE FROM documents WHERE id = ?", (document_id,))
deleted = cursor.rowcount > 0
if deleted:
logger.info(f"Deleted document {document_id}")
return deleted
except Exception as e:
logger.error(f"Error deleting document {document_id}: {e}")
raise TransactionError(f"delete document {document_id}", e, context)
async def get_cache_path(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> Path:
"""Get the cache file path for a document."""
if context is None:
context = ErrorContext(
operation_id=f"get_cache_path_{document_id}",
operation_type=OperationType.READ,
resource_type="CachePath",
resource_id=document_id
)
try:
conn = self.connection_manager.get_database_connection()
cursor = conn.execute("""
SELECT cache_path FROM ast_cache WHERE document_id = ?
""", (document_id,))
row = cursor.fetchone()
if not row:
raise ResourceNotFoundError("Cache", document_id, context)
return Path(row[0])
except Exception as e:
logger.error(f"Error getting cache path for document {document_id}: {e}")
raise QueryError(
f"SELECT cache_path FROM ast_cache WHERE document_id = '{document_id}'",
{"document_id": document_id},
e,
context
)
class SqliteCacheRepository(CacheRepository):
"""
SQLite implementation of CacheRepository.
Provides efficient caching operations using SQLite as storage backend.
"""
def __init__(self, connection_manager: ConnectionManager):
self.connection_manager = connection_manager
self._initialize_cache_schema()
def _initialize_cache_schema(self):
"""Initialize database schema for cache operations."""
try:
conn = self.connection_manager.get_database_connection()
# Create cache entries table
conn.execute("""
CREATE TABLE IF NOT EXISTS cache_entries (
key TEXT PRIMARY KEY,
value_json TEXT NOT NULL,
ttl_expires_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Create index for TTL cleanup
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_ttl ON cache_entries(ttl_expires_at)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed ON cache_entries(accessed_at)")
conn.commit()
logger.info("Cache schema initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize cache schema: {e}")
raise ConnectionError("markitect.db", e)
async def get(
self,
key: str,
context: Optional[ErrorContext] = None
) -> Optional[Any]:
"""Retrieve a value from cache."""
if context is None:
context = ErrorContext(
operation_id=f"cache_get_{key}",
operation_type=OperationType.READ,
resource_type="Cache",
resource_id=key
)
try:
conn = self.connection_manager.get_database_connection()
# Clean up expired entries first
await self._cleanup_expired_entries(conn)
cursor = conn.execute("""
SELECT value_json FROM cache_entries
WHERE key = ? AND (ttl_expires_at IS NULL OR ttl_expires_at > CURRENT_TIMESTAMP)
""", (key,))
row = cursor.fetchone()
if row:
# Update access time
conn.execute("""
UPDATE cache_entries SET accessed_at = CURRENT_TIMESTAMP WHERE key = ?
""", (key,))
conn.commit()
return json.loads(row[0])
return None
except json.JSONDecodeError as e:
logger.error(f"Failed to parse cached value for key {key}: {e}")
# Remove corrupted cache entry
conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
conn.commit()
return None
except Exception as e:
logger.error(f"Error getting cache value for key {key}: {e}")
return None
async def set(
self,
key: str,
value: Any,
ttl: Optional[int] = None,
context: Optional[ErrorContext] = None
) -> bool:
"""Store a value in cache."""
if context is None:
context = ErrorContext(
operation_id=f"cache_set_{key}",
operation_type=OperationType.WRITE,
resource_type="Cache",
resource_id=key,
request_data={"ttl": ttl}
)
try:
conn = self.connection_manager.get_database_connection()
# Calculate expiration time
expires_at = None
if ttl:
from datetime import timedelta
expires_at = (datetime.utcnow() + timedelta(seconds=ttl)).isoformat()
# Serialize value
value_json = json.dumps(value)
# Upsert cache entry
conn.execute("""
INSERT OR REPLACE INTO cache_entries (key, value_json, ttl_expires_at, created_at, accessed_at)
VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
""", (key, value_json, expires_at))
conn.commit()
return True
except Exception as e:
logger.error(f"Error setting cache value for key {key}: {e}")
return False
async def delete(
self,
key: str,
context: Optional[ErrorContext] = None
) -> bool:
"""Delete a value from cache."""
if context is None:
context = ErrorContext(
operation_id=f"cache_delete_{key}",
operation_type=OperationType.DELETE,
resource_type="Cache",
resource_id=key
)
try:
conn = self.connection_manager.get_database_connection()
cursor = conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
conn.commit()
return cursor.rowcount > 0
except Exception as e:
logger.error(f"Error deleting cache value for key {key}: {e}")
return False
async def invalidate_pattern(
self,
pattern: str,
context: Optional[ErrorContext] = None
) -> int:
"""Invalidate cache entries matching a pattern."""
if context is None:
context = ErrorContext(
operation_id=f"cache_invalidate_{pattern}",
operation_type=OperationType.DELETE,
resource_type="Cache",
metadata={"pattern": pattern}
)
try:
conn = self.connection_manager.get_database_connection()
# Convert pattern to SQL LIKE pattern
sql_pattern = pattern.replace("*", "%")
cursor = conn.execute("DELETE FROM cache_entries WHERE key LIKE ?", (sql_pattern,))
conn.commit()
deleted_count = cursor.rowcount
logger.info(f"Invalidated {deleted_count} cache entries matching pattern '{pattern}'")
return deleted_count
except Exception as e:
logger.error(f"Error invalidating cache pattern {pattern}: {e}")
raise QueryError(f"DELETE FROM cache_entries WHERE key LIKE '{pattern}'", {"pattern": pattern}, e, context)
async def store_ast_cache(
self,
document_id: str,
ast: Dict[str, Any],
context: Optional[ErrorContext] = None
) -> bool:
"""Store AST cache for a document."""
if context is None:
context = ErrorContext(
operation_id=f"store_ast_cache_{document_id}",
operation_type=OperationType.WRITE,
resource_type="ASTCache",
resource_id=document_id
)
try:
conn = self.connection_manager.get_database_connection()
# Generate cache file path
cache_id = str(uuid.uuid4())
cache_path = f".cache/ast/{document_id}/{cache_id}.json"
# Create cache directory
cache_dir = Path(cache_path).parent
cache_dir.mkdir(parents=True, exist_ok=True)
# Write AST to cache file
with open(cache_path, 'w') as f:
json.dump(ast, f, indent=2)
cache_size = Path(cache_path).stat().st_size
# Store cache metadata in database
conn.execute("""
INSERT OR REPLACE INTO ast_cache (id, document_id, cache_path, cache_size, created_at, accessed_at)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
""", (cache_id, document_id, cache_path, cache_size))
conn.commit()
logger.info(f"Stored AST cache for document {document_id} at {cache_path}")
return True
except Exception as e:
logger.error(f"Error storing AST cache for document {document_id}: {e}")
return False
async def _cleanup_expired_entries(self, conn: sqlite3.Connection):
"""Clean up expired cache entries."""
try:
cursor = conn.execute("DELETE FROM cache_entries WHERE ttl_expires_at < CURRENT_TIMESTAMP")
deleted_count = cursor.rowcount
if deleted_count > 0:
logger.debug(f"Cleaned up {deleted_count} expired cache entries")
except Exception as e:
logger.warning(f"Error cleaning up expired cache entries: {e}")