Files
markitect-main/tests/integration/repositories/test_document_repository_integration.py
tegwick 21a5d1d734
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat: Implement comprehensive Testing Architecture Enhancement
Establishes robust testing framework with clean architecture patterns:

## Phase 1: Test Infrastructure Foundation
- Global test configuration with pytest.ini and conftest.py
- Isolated test workspaces and environment management
- Comprehensive fixture library for all test types
- Test requirements and dependency management

## Phase 2: Advanced Testing Patterns
- Test builders using builder pattern for domain objects
- Mock factories for repositories, services, and configs
- API response builders for external system simulation
- Enhanced unit tests with proper mocking and isolation

## Phase 3: Test Performance and Quality
- Performance testing framework with benchmarks
- Memory usage monitoring and leak detection
- Custom assertions for domain-specific validation
- Parametrized testing for comprehensive coverage

## Phase 4: CI/CD Integration
- GitHub Actions workflow for automated testing
- Multi-stage testing: unit → integration → e2e → performance
- Code quality checks with flake8, mypy, black, isort
- Security scanning with safety and bandit

## Testing Architecture Benefits
 100+ new test infrastructure components
 Standardized test organization (unit/integration/e2e)
 Mock-based testing with no external dependencies
 Performance regression detection
 Comprehensive fixture library
 CI/CD pipeline with quality gates

The testing framework supports the domain logic separation and provides
a solid foundation for maintaining high code quality as the system evolves.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-26 22:36:35 +02:00

487 lines
16 KiB
Python

"""
Integration tests for document repository with real database.
Demonstrates:
- Real database integration testing
- Transaction testing
- Performance validation
- Error scenario handling
"""
import pytest
import sqlite3
import asyncio
from pathlib import Path
from datetime import datetime, timezone
import tempfile
import shutil
from tests.fixtures.markdown_samples import MarkdownDocumentBuilder, SAMPLE_COMPLEX_DOCUMENT
from tests.utils.assertions import assert_file_exists, assert_performance_within_bounds
class MockDocument:
"""Mock document model for testing."""
def __init__(self, filename: str, content: str, ast_data: dict = None):
self.filename = filename
self.content = content
self.ast_data = ast_data or {}
self.created_at = datetime.now(timezone.utc)
self.updated_at = datetime.now(timezone.utc)
class MockDocumentRepository:
"""Mock document repository that simulates real database operations."""
def __init__(self, db_path: Path):
self.db_path = db_path
self._init_database()
def _init_database(self):
"""Initialize database schema."""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
filename TEXT UNIQUE NOT NULL,
content TEXT NOT NULL,
ast_data TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_documents_filename
ON documents(filename)
""")
conn.commit()
conn.close()
async def store_document(self, document: MockDocument) -> int:
"""Store a document in the database."""
await asyncio.sleep(0.001) # Simulate async database operation
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute("""
INSERT INTO documents (filename, content, ast_data, created_at, updated_at)
VALUES (?, ?, ?, ?, ?)
""", (
document.filename,
document.content,
str(document.ast_data),
document.created_at.isoformat(),
document.updated_at.isoformat()
))
document_id = cursor.lastrowid
conn.commit()
return document_id
except sqlite3.IntegrityError as e:
conn.rollback()
raise ValueError(f"Document with filename '{document.filename}' already exists") from e
finally:
conn.close()
async def get_document(self, document_id: int) -> MockDocument:
"""Retrieve a document by ID."""
await asyncio.sleep(0.001) # Simulate async database operation
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute("""
SELECT filename, content, ast_data, created_at, updated_at
FROM documents WHERE id = ?
""", (document_id,))
row = cursor.fetchone()
if not row:
raise ValueError(f"Document with ID {document_id} not found")
filename, content, ast_data, created_at, updated_at = row
document = MockDocument(filename, content, eval(ast_data) if ast_data else {})
document.created_at = datetime.fromisoformat(created_at)
document.updated_at = datetime.fromisoformat(updated_at)
return document
finally:
conn.close()
async def update_document(self, document_id: int, content: str, ast_data: dict) -> None:
"""Update document content and AST data."""
await asyncio.sleep(0.001) # Simulate async database operation
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute("""
UPDATE documents
SET content = ?, ast_data = ?, updated_at = ?
WHERE id = ?
""", (
content,
str(ast_data),
datetime.now(timezone.utc).isoformat(),
document_id
))
if cursor.rowcount == 0:
raise ValueError(f"Document with ID {document_id} not found")
conn.commit()
finally:
conn.close()
async def delete_document(self, document_id: int) -> None:
"""Delete a document."""
await asyncio.sleep(0.001) # Simulate async database operation
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute("DELETE FROM documents WHERE id = ?", (document_id,))
if cursor.rowcount == 0:
raise ValueError(f"Document with ID {document_id} not found")
conn.commit()
finally:
conn.close()
async def list_all_documents(self):
"""List all documents."""
await asyncio.sleep(0.001) # Simulate async database operation
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute("""
SELECT id, filename, created_at, updated_at
FROM documents ORDER BY created_at DESC
""")
rows = cursor.fetchall()
return [
{
"id": row[0],
"filename": row[1],
"created_at": row[2],
"updated_at": row[3]
}
for row in rows
]
finally:
conn.close()
async def search_content(self, search_term: str):
"""Search documents by content."""
await asyncio.sleep(0.005) # Simulate more expensive search operation
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute("""
SELECT id, filename, content
FROM documents
WHERE content LIKE ?
ORDER BY filename
""", (f"%{search_term}%",))
rows = cursor.fetchall()
return [
{
"id": row[0],
"filename": row[1],
"content": row[2]
}
for row in rows
]
finally:
conn.close()
def close(self):
"""Close repository (cleanup)."""
pass
@pytest.fixture
def test_db_path(test_workspace):
"""Provide test database path."""
return test_workspace / "integration_test.db"
@pytest.fixture
async def document_repository(test_db_path):
"""Provide document repository with real database."""
repo = MockDocumentRepository(test_db_path)
yield repo
repo.close()
@pytest.mark.integration
class TestDocumentRepositoryIntegration:
"""Integration tests for document repository with real database."""
@pytest.mark.asyncio
async def test_store_and_retrieve_document(self, document_repository, test_db_path):
"""Test storing and retrieving a document."""
# Arrange
assert_file_exists(test_db_path)
document = MockDocument(
filename="test.md",
content="# Test Document\nThis is a test.",
ast_data={"type": "document", "children": []}
)
# Act
document_id = await document_repository.store_document(document)
retrieved = await document_repository.get_document(document_id)
# Assert
assert isinstance(document_id, int)
assert document_id > 0
assert retrieved.filename == "test.md"
assert retrieved.content == "# Test Document\nThis is a test."
assert retrieved.ast_data["type"] == "document"
@pytest.mark.asyncio
async def test_store_duplicate_filename_raises_error(self, document_repository):
"""Test that storing duplicate filename raises error."""
# Arrange
document1 = MockDocument("duplicate.md", "Content 1")
document2 = MockDocument("duplicate.md", "Content 2")
# Act
await document_repository.store_document(document1)
# Assert
with pytest.raises(ValueError, match="already exists"):
await document_repository.store_document(document2)
@pytest.mark.asyncio
async def test_update_document_content(self, document_repository):
"""Test updating document content and AST."""
# Arrange
document = MockDocument("update.md", "Original content")
document_id = await document_repository.store_document(document)
# Act
new_content = "Updated content"
new_ast = {"type": "document", "updated": True}
await document_repository.update_document(document_id, new_content, new_ast)
# Verify
updated = await document_repository.get_document(document_id)
assert updated.content == "Updated content"
assert updated.ast_data["updated"] is True
@pytest.mark.asyncio
async def test_delete_document(self, document_repository):
"""Test deleting a document."""
# Arrange
document = MockDocument("delete.md", "To be deleted")
document_id = await document_repository.store_document(document)
# Verify document exists
retrieved = await document_repository.get_document(document_id)
assert retrieved.filename == "delete.md"
# Act
await document_repository.delete_document(document_id)
# Assert
with pytest.raises(ValueError, match="not found"):
await document_repository.get_document(document_id)
@pytest.mark.asyncio
async def test_list_all_documents(self, document_repository):
"""Test listing all documents."""
# Arrange - Store multiple documents
documents = [
MockDocument("doc1.md", "Content 1"),
MockDocument("doc2.md", "Content 2"),
MockDocument("doc3.md", "Content 3")
]
for doc in documents:
await document_repository.store_document(doc)
# Act
all_docs = await document_repository.list_all_documents()
# Assert
assert len(all_docs) == 3
filenames = {doc["filename"] for doc in all_docs}
expected_filenames = {"doc1.md", "doc2.md", "doc3.md"}
assert filenames == expected_filenames
@pytest.mark.asyncio
async def test_search_content(self, document_repository):
"""Test content search functionality."""
# Arrange
documents = [
MockDocument("api.md", "API documentation for REST endpoints"),
MockDocument("guide.md", "User guide for getting started"),
MockDocument("readme.md", "Project README with API examples")
]
for doc in documents:
await document_repository.store_document(doc)
# Act
api_results = await document_repository.search_content("API")
guide_results = await document_repository.search_content("guide")
# Assert
assert len(api_results) == 2 # api.md and readme.md
api_filenames = {result["filename"] for result in api_results}
assert api_filenames == {"api.md", "readme.md"}
assert len(guide_results) == 1 # guide.md only
assert guide_results[0]["filename"] == "guide.md"
@pytest.mark.asyncio
async def test_bulk_operations_performance(self, document_repository, performance_timer):
"""Test performance of bulk operations."""
# Arrange
documents = []
for i in range(50):
content = (MarkdownDocumentBuilder()
.with_heading(f"Document {i}")
.with_paragraph(f"Content for document {i}")
.build())
documents.append(MockDocument(f"bulk_{i}.md", content))
# Act - Bulk storage
performance_timer.start()
document_ids = []
for doc in documents:
doc_id = await document_repository.store_document(doc)
document_ids.append(doc_id)
performance_timer.stop()
# Assert
assert len(document_ids) == 50
assert_performance_within_bounds(performance_timer.elapsed, 5.0, "bulk document storage")
# Act - Bulk retrieval
performance_timer.start()
retrieved_docs = []
for doc_id in document_ids:
doc = await document_repository.get_document(doc_id)
retrieved_docs.append(doc)
performance_timer.stop()
# Assert
assert len(retrieved_docs) == 50
assert_performance_within_bounds(performance_timer.elapsed, 3.0, "bulk document retrieval")
@pytest.mark.asyncio
async def test_concurrent_operations(self, document_repository):
"""Test concurrent database operations."""
# Arrange
async def store_document(index):
content = f"# Document {index}\nContent for document {index}"
doc = MockDocument(f"concurrent_{index}.md", content)
return await document_repository.store_document(doc)
# Act - Concurrent storage
tasks = [store_document(i) for i in range(20)]
document_ids = await asyncio.gather(*tasks)
# Assert
assert len(document_ids) == 20
assert len(set(document_ids)) == 20 # All IDs should be unique
# Verify all documents are accessible
all_docs = await document_repository.list_all_documents()
assert len(all_docs) == 20
@pytest.mark.asyncio
async def test_transaction_like_behavior(self, document_repository):
"""Test error handling doesn't leave database in inconsistent state."""
# Arrange - Store initial document
doc1 = MockDocument("initial.md", "Initial content")
doc_id = await document_repository.store_document(doc1)
# Act - Try to update with invalid ID (should fail)
with pytest.raises(ValueError, match="not found"):
await document_repository.update_document(99999, "Invalid update", {})
# Assert - Original document should be unchanged
retrieved = await document_repository.get_document(doc_id)
assert retrieved.content == "Initial content"
@pytest.mark.asyncio
async def test_large_document_handling(self, document_repository, performance_timer):
"""Test handling of large documents."""
# Arrange - Create large document content
from tests.fixtures.markdown_samples import LargeMarkdownGenerator
generator = LargeMarkdownGenerator(seed=42)
large_content = generator.generate_document(size="100kb")
document = MockDocument("large.md", large_content)
# Act
performance_timer.start()
document_id = await document_repository.store_document(document)
retrieved = await document_repository.get_document(document_id)
performance_timer.stop()
# Assert
assert document_id > 0
assert len(retrieved.content) > 100000 # At least 100KB
assert retrieved.content == large_content
assert_performance_within_bounds(performance_timer.elapsed, 1.0, "large document operations")
@pytest.mark.asyncio
@pytest.mark.slow
async def test_search_performance_with_large_dataset(self, document_repository, performance_timer):
"""Test search performance with large dataset."""
# Arrange - Create many documents with searchable content
search_terms = ["API", "database", "testing", "performance", "integration"]
documents = []
for i in range(100):
term = search_terms[i % len(search_terms)]
content = (MarkdownDocumentBuilder()
.with_heading(f"Document {i}")
.with_paragraph(f"This document covers {term} functionality in detail.")
.with_paragraph("Additional content for search testing.")
.build())
documents.append(MockDocument(f"search_{i}.md", content))
# Store all documents
for doc in documents:
await document_repository.store_document(doc)
# Act - Perform searches
performance_timer.start()
api_results = await document_repository.search_content("API")
database_results = await document_repository.search_content("database")
performance_timer.stop()
# Assert
assert len(api_results) >= 20 # Should find multiple documents
assert len(database_results) >= 20
assert_performance_within_bounds(performance_timer.elapsed, 2.0, "search operations")