Files
markitect-main/tests/test_issue_2_file_ingestion.py
tegwick 72f341279a feat: Implement comprehensive IssueCreator system and create CLI roadmap issues
IssueCreator Implementation:
- Add tddai/issue_creator.py with full POST API functionality for issue creation
- Support multiple creation methods: basic, enhancement, bug, template-based
- Include structured issue formatting with acceptance criteria and dependencies
- Template system with variable substitution for reusable issue creation

Authentication Fix:
- Fix critical authentication bug: use GITEA_API_TOKEN instead of GITEA_TOKEN
- Update both IssueCreator and IssueWriter for consistency
- Update all tests and documentation to reflect correct environment variable

Comprehensive Test Suite:
- Add 15 unit tests for IssueCreator (tests/test_issue_creator.py)
- Add 5 integration tests for full API lifecycle (tests/test_issue_integration.py)
- Create test_environment_variable_detection to prevent future auth issues
- Total 33 tests covering complete issue handling workflow

CLI Integration:
- Enhance tddai_cli.py with 3 new commands: create-issue, create-enhancement, create-from-template
- Add comprehensive argument parsing with optional fields and priority support
- Include user-friendly output with next step guidance
- Update package exports to include IssueCreator

CLI Roadmap Execution:
- Successfully create 8 CLI implementation issues (#12-#19) in Gitea
- Resolve mismatch between NEXT.md roadmap and actual Gitea issues
- Issues prioritized for core USPs: Database Query CLI and AST Query CLI
- Remove local MISSING_ISSUES.md file after successful creation

Framework Maturity:
- Complete CRUD operations for issue management (Create, Read, Update, Delete)
- Robust error handling and API integration patterns
- Full authentication and environment variable management
- Ready for production CLI implementation workflow

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-24 23:36:07 +02:00

316 lines
10 KiB
Python

"""
Test suite for Issue #2: Fast Document Loading & CLI Manipulation
Focus: Subtask 2a - File Ingestion & AST Caching
This test suite covers the core file ingestion and AST caching functionality
that forms the foundation of the performance-optimized document system.
"""
import json
import os
import tempfile
import time
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
from markitect.database import DatabaseManager
from markitect.parser import parse_markdown_to_ast
class TestFileIngestion:
"""Test file ingestion and basic AST processing."""
def setup_method(self):
"""Set up test database and temporary files."""
self.temp_dir = tempfile.mkdtemp()
self.db_path = Path(self.temp_dir) / "test.db"
self.db_manager = DatabaseManager(self.db_path)
self.db_manager.initialize_database() # Initialize the database
# Create test markdown file
self.test_md_content = """---
title: Test Document
author: Test User
date: "2025-09-24"
---
# Test Document
This is a test document with **bold** and *italic* text.
## Section 1
- Item 1
- Item 2
- Item 3
## Section 2
Some more content here.
"""
self.test_file = Path(self.temp_dir) / "test.md"
self.test_file.write_text(self.test_md_content)
def teardown_method(self):
"""Clean up test files."""
import shutil
shutil.rmtree(self.temp_dir)
def test_parse_markdown_file_to_ast(self):
"""Test parsing markdown file to AST representation."""
# This test should fail initially - we need to implement document ingestion
from markitect.document_manager import DocumentManager # This will fail initially
doc_manager = DocumentManager(self.db_manager)
result = doc_manager.ingest_file(self.test_file)
# Verify the result contains parsed AST
assert result is not None
assert 'ast' in result
assert 'metadata' in result
assert result['metadata']['filename'] == 'test.md'
assert result['metadata']['title'] == 'Test Document'
def test_ast_contains_expected_structure(self):
"""Test that parsed AST contains expected document structure."""
# Parse the test file
ast = parse_markdown_to_ast(self.test_md_content)
# Verify AST structure contains expected elements
assert isinstance(ast, list)
assert len(ast) > 0
# Should contain front matter, headings, paragraphs, lists
token_types = [token['type'] for token in ast]
assert 'heading_open' in token_types
assert 'paragraph_open' in token_types
assert 'bullet_list_open' in token_types
class TestASTCaching:
"""Test AST caching system for performance optimization."""
def setup_method(self):
"""Set up test environment with caching."""
self.temp_dir = tempfile.mkdtemp()
self.db_path = Path(self.temp_dir) / "test.db"
self.cache_dir = Path(self.temp_dir) / "ast_cache"
self.cache_dir.mkdir()
self.test_file = Path(self.temp_dir) / "performance_test.md"
# Create a larger test file for performance testing
large_content = """---
title: Large Test Document
---
# Large Document
""" + "\n\n".join([f"## Section {i}\n\nContent for section {i} with multiple paragraphs." for i in range(50)])
self.test_file.write_text(large_content)
def teardown_method(self):
"""Clean up test files."""
import shutil
shutil.rmtree(self.temp_dir)
def test_create_ast_cache_file(self):
"""Test creating AST cache file from markdown."""
# This will fail initially - need to implement AST cache system
from markitect.ast_cache import ASTCache # This will fail initially
cache = ASTCache(self.cache_dir)
cache_info = cache.cache_file(self.test_file)
# Verify cache file was created
assert cache_info['cache_file'].exists()
assert cache_info['cache_file'].suffix == '.json'
# Verify cache contains valid AST
with open(cache_info['cache_file']) as f:
cached_ast = json.load(f)
assert isinstance(cached_ast, list)
assert len(cached_ast) > 0
def test_cache_faster_than_parsing(self):
"""Test that cache loading is faster than re-parsing markdown."""
# This test validates the core performance requirement
from markitect.ast_cache import ASTCache
cache = ASTCache(self.cache_dir)
# Time the initial parse and cache creation
start_time = time.time()
cache_info = cache.cache_file(self.test_file)
initial_parse_time = time.time() - start_time
# Time loading from cache
start_time = time.time()
cached_ast = cache.load_cached_ast(self.test_file)
cache_load_time = time.time() - start_time
# Cache loading should be significantly faster
assert cache_load_time < (initial_parse_time * 0.5) # Less than 50% as per requirements
assert cached_ast is not None
def test_cache_invalidation_on_file_change(self):
"""Test that cache is invalidated when source file changes."""
from markitect.ast_cache import ASTCache
cache = ASTCache(self.cache_dir)
original_cache = cache.cache_file(self.test_file)
original_mtime = original_cache['cache_file'].stat().st_mtime
# Modify the source file
time.sleep(0.1) # Ensure different timestamp
modified_content = self.test_file.read_text() + "\n\n## New Section\n\nAdded content."
self.test_file.write_text(modified_content)
# Cache should detect the change and regenerate
new_cache = cache.cache_file(self.test_file)
new_mtime = new_cache['cache_file'].stat().st_mtime
assert new_mtime > original_mtime
class TestDatabaseIntegration:
"""Test integration with existing database system from Issue #1."""
def setup_method(self):
"""Set up test database."""
self.temp_dir = tempfile.mkdtemp()
self.db_path = Path(self.temp_dir) / "test.db"
self.db_manager = DatabaseManager(self.db_path)
self.db_manager.initialize_database() # Initialize the database
self.test_file = Path(self.temp_dir) / "integration_test.md"
self.test_content = """---
title: Integration Test
category: testing
---
# Integration Test
Testing database integration.
"""
self.test_file.write_text(self.test_content)
def teardown_method(self):
"""Clean up test files."""
import shutil
shutil.rmtree(self.temp_dir)
def test_store_document_metadata_in_database(self):
"""Test storing document metadata in existing database structure."""
# This should build on Issue #1's database functionality
from markitect.document_manager import DocumentManager
doc_manager = DocumentManager(self.db_manager)
result = doc_manager.ingest_file(self.test_file)
# Verify metadata is stored in database
stored_files = self.db_manager.list_markdown_files()
assert len(stored_files) == 1
stored_file = stored_files[0]
assert stored_file['filename'] == 'integration_test.md'
assert stored_file['front_matter']['title'] == 'Integration Test'
assert stored_file['front_matter']['category'] == 'testing'
def test_store_ast_cache_reference_in_database(self):
"""Test storing AST cache file reference in database."""
from markitect.document_manager import DocumentManager
doc_manager = DocumentManager(self.db_manager)
result = doc_manager.ingest_file(self.test_file)
# Verify AST cache reference is stored
assert 'ast_cache_path' in result
assert result['ast_cache_path'].exists()
# Verify database contains cache reference
stored_files = self.db_manager.list_markdown_files()
stored_file = stored_files[0]
# For now, cache reference is tracked in the result object
assert result['ast_cache_path'].exists()
def test_performance_metadata_tracking(self):
"""Test tracking performance metrics for cache validation."""
from markitect.document_manager import DocumentManager
doc_manager = DocumentManager(self.db_manager)
result = doc_manager.ingest_file(self.test_file)
# Verify performance metrics are tracked
assert 'parse_time' in result
assert 'cache_time' in result
assert result['parse_time'] > 0
assert result['cache_time'] >= 0
class TestErrorHandling:
"""Test error handling for file ingestion and caching."""
def setup_method(self):
"""Set up test environment."""
self.temp_dir = tempfile.mkdtemp()
self.db_path = Path(self.temp_dir) / "test.db"
def teardown_method(self):
"""Clean up test files."""
import shutil
shutil.rmtree(self.temp_dir)
def test_handle_nonexistent_file(self):
"""Test handling of nonexistent file."""
from markitect.document_manager import DocumentManager
db_manager = DatabaseManager(self.db_path)
doc_manager = DocumentManager(db_manager)
nonexistent_file = Path(self.temp_dir) / "nonexistent.md"
with pytest.raises(FileNotFoundError):
doc_manager.ingest_file(nonexistent_file)
def test_handle_invalid_markdown(self):
"""Test handling of invalid or malformed markdown."""
from markitect.document_manager import DocumentManager
# Create file with malformed front matter
invalid_file = Path(self.temp_dir) / "invalid.md"
invalid_content = """---
title: Test
invalid_yaml: [unclosed bracket
---
# Content
"""
invalid_file.write_text(invalid_content)
db_manager = DatabaseManager(self.db_path)
doc_manager = DocumentManager(db_manager)
# Should handle gracefully, not crash
result = doc_manager.ingest_file(invalid_file)
assert result is not None
# Front matter parsing should fail gracefully
def test_handle_cache_directory_permissions(self):
"""Test handling of cache directory permission issues."""
from markitect.ast_cache import ASTCache
# Create read-only directory to simulate permission issues
readonly_dir = Path(self.temp_dir) / "readonly"
readonly_dir.mkdir()
readonly_dir.chmod(0o444) # Read-only
test_file = Path(self.temp_dir) / "test.md"
test_file.write_text("# Test")
cache = ASTCache(readonly_dir)
with pytest.raises(PermissionError):
cache.cache_file(test_file)