fix: Add missing infrastructure files from data access improvements

Add infrastructure components that were created during issue #24
but not properly committed:

- Data access repositories and interfaces
- Connection management infrastructure
- Exception handling framework
- Configuration management
- Documentation from data access pattern improvements

These files are essential infrastructure components that enable
the repository pattern and improved data access strategies.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-27 08:35:34 +02:00
parent 398c45d71c
commit f782ac1f69
8 changed files with 3819 additions and 0 deletions

View File

@@ -0,0 +1,677 @@
"""
SQLite repository implementation with transaction support.
Provides efficient database operations with connection pooling,
transaction management, and proper error handling.
"""
import sqlite3
import json
import uuid
from infrastructure.logging import get_logger
from typing import List, Optional, Dict, Any
from datetime import datetime
from pathlib import Path
from contextlib import asynccontextmanager
from infrastructure.repositories.interfaces import DocumentRepository, CacheRepository
from infrastructure.connection_manager import ConnectionManager
from infrastructure.exceptions import (
ErrorContext, OperationType, DatabaseError, ConnectionError,
ResourceNotFoundError, DuplicateResourceError, ValidationError,
TransactionError, QueryError
)
logger = get_logger(__name__)
class SqliteDocumentRepository(DocumentRepository):
"""
SQLite implementation of DocumentRepository with transaction support.
Provides efficient document storage and retrieval with proper
transaction handling and optimized database operations.
"""
def __init__(self, connection_manager: ConnectionManager):
self.connection_manager = connection_manager
self._initialize_schema()
def _initialize_schema(self):
"""Initialize database schema for documents."""
try:
conn = self.connection_manager.get_database_connection()
# Create documents table
conn.execute("""
CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
filename TEXT NOT NULL,
content TEXT NOT NULL,
ast_json TEXT NOT NULL,
content_hash TEXT NOT NULL,
file_size INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(filename, content_hash)
)
""")
# Create cache table
conn.execute("""
CREATE TABLE IF NOT EXISTS ast_cache (
id TEXT PRIMARY KEY,
document_id TEXT NOT NULL,
cache_path TEXT NOT NULL,
cache_size INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
)
""")
# Create indexes for performance
conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_document_id ON ast_cache(document_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed_at ON ast_cache(accessed_at)")
conn.commit()
logger.info("Database schema initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize database schema: {e}")
raise ConnectionError("markitect.db", e)
async def store_document(
self,
filename: str,
content: str,
ast: Dict[str, Any],
context: Optional[ErrorContext] = None
) -> str:
"""Store a document with its AST representation."""
if context is None:
context = ErrorContext(
operation_id=f"store_document_{filename}",
operation_type=OperationType.WRITE,
resource_type="Document",
request_data={
"filename": filename,
"content_length": len(content),
"ast_keys": list(ast.keys()) if ast else []
}
)
# Validate input
if not filename or not filename.strip():
raise ValidationError("filename", filename, "Filename cannot be empty", context)
if not content:
raise ValidationError("content", content, "Content cannot be empty", context)
if not ast:
raise ValidationError("ast", ast, "AST cannot be empty", context)
try:
async with self.connection_manager.transaction() as conn:
# Generate unique document ID
document_id = str(uuid.uuid4())
# Calculate content hash for deduplication
import hashlib
content_hash = hashlib.sha256(content.encode()).hexdigest()
# Check for duplicate content
cursor = conn.execute(
"SELECT id FROM documents WHERE filename = ? AND content_hash = ?",
(filename, content_hash)
)
existing = cursor.fetchone()
if existing:
raise DuplicateResourceError("Document", filename, context)
# Store document
ast_json = json.dumps(ast)
file_size = len(content)
now = datetime.utcnow().isoformat()
conn.execute("""
INSERT INTO documents (id, filename, content, ast_json, content_hash, file_size, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (document_id, filename, content, ast_json, content_hash, file_size, now, now))
logger.info(f"Stored document {filename} with ID {document_id}")
return document_id
except sqlite3.IntegrityError as e:
if "UNIQUE constraint failed" in str(e):
raise DuplicateResourceError("Document", filename, context)
else:
raise DatabaseError(f"Integrity error storing document {filename}", e, context)
except Exception as e:
logger.error(f"Error storing document {filename}: {e}")
raise TransactionError(f"store document {filename}", e, context)
async def get_document(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""Retrieve a document by its ID."""
if context is None:
context = ErrorContext(
operation_id=f"get_document_{document_id}",
operation_type=OperationType.READ,
resource_type="Document",
resource_id=document_id
)
try:
conn = self.connection_manager.get_database_connection()
cursor = conn.execute("""
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
FROM documents
WHERE id = ?
""", (document_id,))
row = cursor.fetchone()
if not row:
raise ResourceNotFoundError("Document", document_id, context)
# Parse the row data
return {
"id": row[0],
"filename": row[1],
"content": row[2],
"ast": json.loads(row[3]),
"content_hash": row[4],
"file_size": row[5],
"created_at": row[6],
"updated_at": row[7]
}
except ResourceNotFoundError:
# Re-raise ResourceNotFoundError as-is
raise
except json.JSONDecodeError as e:
logger.error(f"Failed to parse AST JSON for document {document_id}: {e}")
raise QueryError(
f"SELECT * FROM documents WHERE id = '{document_id}'",
{"document_id": document_id},
e,
context
)
except Exception as e:
logger.error(f"Error retrieving document {document_id}: {e}")
raise QueryError(
f"SELECT * FROM documents WHERE id = '{document_id}'",
{"document_id": document_id},
e,
context
)
async def get_documents(
self,
filename_pattern: Optional[str] = None,
limit: int = 100,
offset: int = 0,
context: Optional[ErrorContext] = None
) -> List[Dict[str, Any]]:
"""Retrieve multiple documents with filtering and pagination."""
if context is None:
context = ErrorContext(
operation_id=f"get_documents_{filename_pattern or 'all'}",
operation_type=OperationType.READ,
resource_type="Document",
metadata={
"filename_pattern": filename_pattern,
"limit": limit,
"offset": offset
}
)
try:
conn = self.connection_manager.get_database_connection()
# Build query based on filter
if filename_pattern:
query = """
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
FROM documents
WHERE filename LIKE ?
ORDER BY created_at DESC
LIMIT ? OFFSET ?
"""
params = (f"%{filename_pattern}%", limit, offset)
else:
query = """
SELECT id, filename, content, ast_json, content_hash, file_size, created_at, updated_at
FROM documents
ORDER BY created_at DESC
LIMIT ? OFFSET ?
"""
params = (limit, offset)
cursor = conn.execute(query, params)
rows = cursor.fetchall()
documents = []
for row in rows:
try:
document = {
"id": row[0],
"filename": row[1],
"content": row[2],
"ast": json.loads(row[3]),
"content_hash": row[4],
"file_size": row[5],
"created_at": row[6],
"updated_at": row[7]
}
documents.append(document)
except json.JSONDecodeError as e:
logger.warning(f"Skipping document {row[0]} due to invalid AST JSON: {e}")
continue
return documents
except Exception as e:
logger.error(f"Error retrieving documents: {e}")
raise QueryError("SELECT documents with pagination", {"limit": limit, "offset": offset}, e, context)
async def update_document(
self,
document_id: str,
content: Optional[str] = None,
ast: Optional[Dict[str, Any]] = None,
context: Optional[ErrorContext] = None
) -> Dict[str, Any]:
"""Update an existing document."""
if context is None:
context = ErrorContext(
operation_id=f"update_document_{document_id}",
operation_type=OperationType.UPDATE,
resource_type="Document",
resource_id=document_id,
request_data={
"content_length": len(content) if content else None,
"ast_keys": list(ast.keys()) if ast else None
}
)
try:
async with self.connection_manager.transaction() as conn:
# Check if document exists
cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
if not cursor.fetchone():
raise ResourceNotFoundError("Document", document_id, context)
# Build update query
updates = []
params = []
if content is not None:
# Recalculate content hash
import hashlib
content_hash = hashlib.sha256(content.encode()).hexdigest()
file_size = len(content)
updates.extend(["content = ?", "content_hash = ?", "file_size = ?"])
params.extend([content, content_hash, file_size])
if ast is not None:
ast_json = json.dumps(ast)
updates.append("ast_json = ?")
params.append(ast_json)
if not updates:
# No changes to make
return await self.get_document(document_id, context)
# Add updated timestamp
updates.append("updated_at = ?")
params.append(datetime.utcnow().isoformat())
# Add document_id for WHERE clause
params.append(document_id)
query = f"UPDATE documents SET {', '.join(updates)} WHERE id = ?"
conn.execute(query, params)
logger.info(f"Updated document {document_id}")
# Return updated document
return await self.get_document(document_id, context)
except Exception as e:
logger.error(f"Error updating document {document_id}: {e}")
raise TransactionError(f"update document {document_id}", e, context)
async def delete_document(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> bool:
"""Delete a document."""
if context is None:
context = ErrorContext(
operation_id=f"delete_document_{document_id}",
operation_type=OperationType.DELETE,
resource_type="Document",
resource_id=document_id
)
try:
async with self.connection_manager.transaction() as conn:
# Check if document exists
cursor = conn.execute("SELECT id FROM documents WHERE id = ?", (document_id,))
if not cursor.fetchone():
raise ResourceNotFoundError("Document", document_id, context)
# Delete associated cache entries first (due to foreign key)
conn.execute("DELETE FROM ast_cache WHERE document_id = ?", (document_id,))
# Delete document
cursor = conn.execute("DELETE FROM documents WHERE id = ?", (document_id,))
deleted = cursor.rowcount > 0
if deleted:
logger.info(f"Deleted document {document_id}")
return deleted
except Exception as e:
logger.error(f"Error deleting document {document_id}: {e}")
raise TransactionError(f"delete document {document_id}", e, context)
async def get_cache_path(
self,
document_id: str,
context: Optional[ErrorContext] = None
) -> Path:
"""Get the cache file path for a document."""
if context is None:
context = ErrorContext(
operation_id=f"get_cache_path_{document_id}",
operation_type=OperationType.READ,
resource_type="CachePath",
resource_id=document_id
)
try:
conn = self.connection_manager.get_database_connection()
cursor = conn.execute("""
SELECT cache_path FROM ast_cache WHERE document_id = ?
""", (document_id,))
row = cursor.fetchone()
if not row:
raise ResourceNotFoundError("Cache", document_id, context)
return Path(row[0])
except Exception as e:
logger.error(f"Error getting cache path for document {document_id}: {e}")
raise QueryError(
f"SELECT cache_path FROM ast_cache WHERE document_id = '{document_id}'",
{"document_id": document_id},
e,
context
)
class SqliteCacheRepository(CacheRepository):
"""
SQLite implementation of CacheRepository.
Provides efficient caching operations using SQLite as storage backend.
"""
def __init__(self, connection_manager: ConnectionManager):
self.connection_manager = connection_manager
self._initialize_cache_schema()
def _initialize_cache_schema(self):
"""Initialize database schema for cache operations."""
try:
conn = self.connection_manager.get_database_connection()
# Create cache entries table
conn.execute("""
CREATE TABLE IF NOT EXISTS cache_entries (
key TEXT PRIMARY KEY,
value_json TEXT NOT NULL,
ttl_expires_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
accessed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Create index for TTL cleanup
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_ttl ON cache_entries(ttl_expires_at)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_cache_accessed ON cache_entries(accessed_at)")
conn.commit()
logger.info("Cache schema initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize cache schema: {e}")
raise ConnectionError("markitect.db", e)
async def get(
self,
key: str,
context: Optional[ErrorContext] = None
) -> Optional[Any]:
"""Retrieve a value from cache."""
if context is None:
context = ErrorContext(
operation_id=f"cache_get_{key}",
operation_type=OperationType.READ,
resource_type="Cache",
resource_id=key
)
try:
conn = self.connection_manager.get_database_connection()
# Clean up expired entries first
await self._cleanup_expired_entries(conn)
cursor = conn.execute("""
SELECT value_json FROM cache_entries
WHERE key = ? AND (ttl_expires_at IS NULL OR ttl_expires_at > CURRENT_TIMESTAMP)
""", (key,))
row = cursor.fetchone()
if row:
# Update access time
conn.execute("""
UPDATE cache_entries SET accessed_at = CURRENT_TIMESTAMP WHERE key = ?
""", (key,))
conn.commit()
return json.loads(row[0])
return None
except json.JSONDecodeError as e:
logger.error(f"Failed to parse cached value for key {key}: {e}")
# Remove corrupted cache entry
conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
conn.commit()
return None
except Exception as e:
logger.error(f"Error getting cache value for key {key}: {e}")
return None
async def set(
self,
key: str,
value: Any,
ttl: Optional[int] = None,
context: Optional[ErrorContext] = None
) -> bool:
"""Store a value in cache."""
if context is None:
context = ErrorContext(
operation_id=f"cache_set_{key}",
operation_type=OperationType.WRITE,
resource_type="Cache",
resource_id=key,
request_data={"ttl": ttl}
)
try:
conn = self.connection_manager.get_database_connection()
# Calculate expiration time
expires_at = None
if ttl:
from datetime import timedelta
expires_at = (datetime.utcnow() + timedelta(seconds=ttl)).isoformat()
# Serialize value
value_json = json.dumps(value)
# Upsert cache entry
conn.execute("""
INSERT OR REPLACE INTO cache_entries (key, value_json, ttl_expires_at, created_at, accessed_at)
VALUES (?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
""", (key, value_json, expires_at))
conn.commit()
return True
except Exception as e:
logger.error(f"Error setting cache value for key {key}: {e}")
return False
async def delete(
self,
key: str,
context: Optional[ErrorContext] = None
) -> bool:
"""Delete a value from cache."""
if context is None:
context = ErrorContext(
operation_id=f"cache_delete_{key}",
operation_type=OperationType.DELETE,
resource_type="Cache",
resource_id=key
)
try:
conn = self.connection_manager.get_database_connection()
cursor = conn.execute("DELETE FROM cache_entries WHERE key = ?", (key,))
conn.commit()
return cursor.rowcount > 0
except Exception as e:
logger.error(f"Error deleting cache value for key {key}: {e}")
return False
async def invalidate_pattern(
self,
pattern: str,
context: Optional[ErrorContext] = None
) -> int:
"""Invalidate cache entries matching a pattern."""
if context is None:
context = ErrorContext(
operation_id=f"cache_invalidate_{pattern}",
operation_type=OperationType.DELETE,
resource_type="Cache",
metadata={"pattern": pattern}
)
try:
conn = self.connection_manager.get_database_connection()
# Convert pattern to SQL LIKE pattern
sql_pattern = pattern.replace("*", "%")
cursor = conn.execute("DELETE FROM cache_entries WHERE key LIKE ?", (sql_pattern,))
conn.commit()
deleted_count = cursor.rowcount
logger.info(f"Invalidated {deleted_count} cache entries matching pattern '{pattern}'")
return deleted_count
except Exception as e:
logger.error(f"Error invalidating cache pattern {pattern}: {e}")
raise QueryError(f"DELETE FROM cache_entries WHERE key LIKE '{pattern}'", {"pattern": pattern}, e, context)
async def store_ast_cache(
self,
document_id: str,
ast: Dict[str, Any],
context: Optional[ErrorContext] = None
) -> bool:
"""Store AST cache for a document."""
if context is None:
context = ErrorContext(
operation_id=f"store_ast_cache_{document_id}",
operation_type=OperationType.WRITE,
resource_type="ASTCache",
resource_id=document_id
)
try:
conn = self.connection_manager.get_database_connection()
# Generate cache file path
cache_id = str(uuid.uuid4())
cache_path = f".cache/ast/{document_id}/{cache_id}.json"
# Create cache directory
cache_dir = Path(cache_path).parent
cache_dir.mkdir(parents=True, exist_ok=True)
# Write AST to cache file
with open(cache_path, 'w') as f:
json.dump(ast, f, indent=2)
cache_size = Path(cache_path).stat().st_size
# Store cache metadata in database
conn.execute("""
INSERT OR REPLACE INTO ast_cache (id, document_id, cache_path, cache_size, created_at, accessed_at)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
""", (cache_id, document_id, cache_path, cache_size))
conn.commit()
logger.info(f"Stored AST cache for document {document_id} at {cache_path}")
return True
except Exception as e:
logger.error(f"Error storing AST cache for document {document_id}: {e}")
return False
async def _cleanup_expired_entries(self, conn: sqlite3.Connection):
"""Clean up expired cache entries."""
try:
cursor = conn.execute("DELETE FROM cache_entries WHERE ttl_expires_at < CURRENT_TIMESTAMP")
deleted_count = cursor.rowcount
if deleted_count > 0:
logger.debug(f"Cleaned up {deleted_count} expired cache entries")
except Exception as e:
logger.warning(f"Error cleaning up expired cache entries: {e}")