""" Tests for Issue #17: Batch Processing and Recursive Operations This test suite verifies the batch processing functionality including: - Directory processing with recursive support - Glob pattern matching for file selection - Progress tracking and error handling - Depth control for recursive operations """ import pytest import tempfile import shutil from pathlib import Path from unittest.mock import Mock, patch, MagicMock from click.testing import CliRunner from markitect.batch_processor import ( BatchProcessor, ProcessingMode, ErrorHandling, ProcessingResult, BatchResult, ProgressTracker, create_file_processor ) from markitect.cli import cli class TestBatchProcessor: """Test the core BatchProcessor functionality.""" def setup_method(self): """Set up test environment.""" self.temp_dir = tempfile.mkdtemp() self.test_dir = Path(self.temp_dir) def teardown_method(self): """Clean up test environment.""" shutil.rmtree(self.temp_dir) def create_test_files(self, structure): """Create test file structure from dict.""" for path, content in structure.items(): file_path = self.test_dir / path file_path.parent.mkdir(parents=True, exist_ok=True) file_path.write_text(content) def test_find_markdown_files_non_recursive(self): """Test finding markdown files without recursion.""" # Create test structure self.create_test_files({ 'file1.md': '# Test 1', 'file2.md': '# Test 2', 'file3.txt': 'Not markdown', 'subdir/file4.md': '# Test 4' }) processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir, recursive=False) # Should find only files in root directory assert len(files) == 2 file_names = [f.name for f in files] assert 'file1.md' in file_names assert 'file2.md' in file_names assert 'file4.md' not in file_names def test_find_markdown_files_recursive(self): """Test finding markdown files with recursion.""" # Create test structure self.create_test_files({ 'file1.md': '# Test 1', 'subdir/file2.md': '# Test 2', 'subdir/nested/file3.md': '# Test 3', 'subdir/file4.txt': 'Not markdown' }) processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir, recursive=True) # Should find all markdown files assert len(files) == 3 file_names = [f.name for f in files] assert 'file1.md' in file_names assert 'file2.md' in file_names assert 'file3.md' in file_names def test_find_markdown_files_with_depth_limit(self): """Test recursive search with depth limit.""" # Create test structure self.create_test_files({ 'file1.md': '# Test 1', 'level1/file2.md': '# Test 2', 'level1/level2/file3.md': '# Test 3', 'level1/level2/level3/file4.md': '# Test 4' }) processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir, recursive=True, depth=1) # Should find files up to depth 1 assert len(files) == 2 file_names = [f.name for f in files] assert 'file1.md' in file_names assert 'file2.md' in file_names assert 'file3.md' not in file_names assert 'file4.md' not in file_names def test_find_markdown_files_with_pattern(self): """Test finding files with custom pattern.""" # Create test structure self.create_test_files({ 'file1.md': '# Test 1', 'file2.markdown': '# Test 2', 'file3.txt': 'Not markdown' }) processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir, pattern='*.markdown') # Should find only .markdown files assert len(files) == 1 assert files[0].name == 'file2.markdown' def test_find_files_by_glob(self): """Test glob pattern file finding.""" # Create test structure self.create_test_files({ 'docs/file1.md': '# Test 1', 'docs/subdir/file2.md': '# Test 2', 'src/file3.md': '# Test 3', 'file4.txt': 'Not markdown' }) processor = BatchProcessor() # Test recursive glob files = processor.find_files_by_glob(str(self.test_dir / "**/*.md")) assert len(files) == 3 # Test specific directory glob files = processor.find_files_by_glob(str(self.test_dir / "docs/*.md")) assert len(files) == 1 assert files[0].name == 'file1.md' def test_process_files_success(self): """Test successful file processing.""" # Create test files self.create_test_files({ 'file1.md': '# Test 1', 'file2.md': '# Test 2' }) processor = BatchProcessor(show_progress=False) files = list(self.test_dir.glob('*.md')) def mock_processor(file_path): return ProcessingResult( file_path=file_path, success=True, message="Processed successfully" ) result = processor.process_files(files, mock_processor, "Testing") assert result.total_files == 2 assert result.processed == 2 assert result.succeeded == 2 assert result.failed == 0 assert result.skipped == 0 def test_process_files_with_errors(self): """Test file processing with errors.""" # Create test files self.create_test_files({ 'file1.md': '# Test 1', 'file2.md': '# Test 2', 'file3.md': '# Test 3' }) processor = BatchProcessor(show_progress=False, error_handling=ErrorHandling.CONTINUE) files = list(self.test_dir.glob('*.md')) def mock_processor(file_path): # Fail on file2.md if file_path.name == 'file2.md': return ProcessingResult( file_path=file_path, success=False, message="Processing failed", error="Mock error" ) return ProcessingResult( file_path=file_path, success=True, message="Processed successfully" ) result = processor.process_files(files, mock_processor, "Testing") assert result.total_files == 3 assert result.processed == 3 assert result.succeeded == 2 assert result.failed == 1 assert len(result.errors) == 1 def test_process_files_stop_on_error(self): """Test stop-on-error behavior.""" # Create test files self.create_test_files({ 'file1.md': '# Test 1', 'file2.md': '# Test 2', 'file3.md': '# Test 3' }) processor = BatchProcessor(show_progress=False, error_handling=ErrorHandling.STOP) files = sorted(list(self.test_dir.glob('*.md'))) def mock_processor(file_path): # Fail on second file if file_path.name == 'file2.md': return ProcessingResult( file_path=file_path, success=False, message="Processing failed", error="Mock error" ) return ProcessingResult( file_path=file_path, success=True, message="Processed successfully" ) result = processor.process_files(files, mock_processor, "Testing") # Should stop after the error assert result.processed == 2 # file1 success, file2 error assert result.succeeded == 1 assert result.failed == 1 class TestProgressTracker: """Test the ProgressTracker functionality.""" def test_progress_tracking(self): """Test basic progress tracking.""" tracker = ProgressTracker(total=3, show_progress=False) # Test successful processing result1 = ProcessingResult(Path("file1.md"), True, "Success") tracker.update(result1) assert tracker.processed == 1 assert tracker.succeeded == 1 assert tracker.failed == 0 # Test failed processing result2 = ProcessingResult(Path("file2.md"), False, "Failed", "Error message") tracker.update(result2) assert tracker.processed == 2 assert tracker.succeeded == 1 assert tracker.failed == 1 # Test skipped file tracker.skip_file(Path("file3.md"), "Skipped reason") assert tracker.skipped == 1 class TestFileProcessor: """Test the file processor creation and execution.""" def setup_method(self): """Set up test environment.""" self.temp_dir = tempfile.mkdtemp() self.test_dir = Path(self.temp_dir) def teardown_method(self): """Clean up test environment.""" shutil.rmtree(self.temp_dir) @patch('markitect.database.DatabaseManager') def test_ingest_processor(self, mock_db_manager): """Test file processor for ingestion.""" # Create test file test_file = self.test_dir / "test.md" test_file.write_text("# Test content") # Mock database manager mock_db = Mock() mock_db_manager.return_value = mock_db config = {'database': 'test.db'} processor = create_file_processor(config, ProcessingMode.INGEST) result = processor(test_file) assert result.success assert result.file_path == test_file assert "Ingested successfully" in result.message mock_db.store_document.assert_called_once() @patch('markitect.database.DatabaseManager') def test_status_processor(self, mock_db_manager): """Test file processor for status checking.""" # Create test file test_file = self.test_dir / "test.md" test_file.write_text("# Test content") # Mock database manager mock_db = Mock() mock_db.get_metadata.return_value = {'id': 'test123'} mock_db_manager.return_value = mock_db config = {'database': 'test.db'} processor = create_file_processor(config, ProcessingMode.STATUS) result = processor(test_file) assert result.success assert result.file_path == test_file assert "Found in database" in result.message def test_validate_processor(self): """Test file processor for validation.""" # Create test file test_file = self.test_dir / "test.md" test_file.write_text("# Test content") config = {} processor = create_file_processor(config, ProcessingMode.VALIDATE) result = processor(test_file) assert result.success assert result.file_path == test_file assert "Valid markdown" in result.message def test_validate_processor_empty_file(self): """Test validation processor with empty file.""" # Create empty file test_file = self.test_dir / "empty.md" test_file.write_text("") config = {} processor = create_file_processor(config, ProcessingMode.VALIDATE) result = processor(test_file) assert not result.success assert "File is empty" in result.error class TestCLIIntegration: """Test CLI command integration.""" def setup_method(self): """Set up test environment.""" self.temp_dir = tempfile.mkdtemp() self.test_dir = Path(self.temp_dir) self.runner = CliRunner() def teardown_method(self): """Clean up test environment.""" shutil.rmtree(self.temp_dir) def create_test_files(self, structure): """Create test file structure from dict.""" for path, content in structure.items(): file_path = self.test_dir / path file_path.parent.mkdir(parents=True, exist_ok=True) file_path.write_text(content) @patch('markitect.database.DatabaseManager') def test_ingest_dir_command(self, mock_db_manager): """Test ingest-dir CLI command.""" # Create test files self.create_test_files({ 'file1.md': '# Test 1', 'file2.md': '# Test 2', 'subdir/file3.md': '# Test 3' }) # Mock database mock_db = Mock() mock_db_manager.return_value = mock_db result = self.runner.invoke(cli, [ 'ingest-dir', str(self.test_dir), '--quiet' ]) assert result.exit_code == 0 # Should process 2 files (non-recursive by default) assert mock_db.store_document.call_count == 2 @patch('markitect.database.DatabaseManager') def test_ingest_dir_recursive(self, mock_db_manager): """Test ingest-dir with recursive option.""" # Create test files self.create_test_files({ 'file1.md': '# Test 1', 'subdir/file2.md': '# Test 2', 'subdir/nested/file3.md': '# Test 3' }) # Mock database mock_db = Mock() mock_db_manager.return_value = mock_db result = self.runner.invoke(cli, [ 'ingest-dir', str(self.test_dir), '--recursive', '--quiet' ]) assert result.exit_code == 0 # Should process all 3 files assert mock_db.store_document.call_count == 3 @patch('markitect.database.DatabaseManager') def test_batch_process_command(self, mock_db_manager): """Test batch-process CLI command.""" # Create test files self.create_test_files({ 'docs/file1.md': '# Test 1', 'docs/file2.md': '# Test 2', 'src/file3.md': '# Test 3' }) # Mock database mock_db = Mock() mock_db_manager.return_value = mock_db # Test glob pattern pattern = str(self.test_dir / "docs/*.md") result = self.runner.invoke(cli, [ 'batch-process', pattern, '--operation', 'ingest', '--quiet' ]) assert result.exit_code == 0 # Should process 2 files from docs directory assert mock_db.store_document.call_count == 2 @patch('markitect.database.DatabaseManager') def test_recursive_command(self, mock_db_manager): """Test recursive CLI command.""" # Create test files self.create_test_files({ 'level1/file1.md': '# Test 1', 'level1/level2/file2.md': '# Test 2', 'level1/level2/level3/file3.md': '# Test 3' }) # Mock database mock_db = Mock() mock_db.get_metadata.side_effect = Exception("Not found") mock_db_manager.return_value = mock_db result = self.runner.invoke(cli, [ 'recursive', str(self.test_dir), '--depth', '2', '--operation', 'status', '--quiet' ]) assert result.exit_code == 0 # Should check status for files up to depth 2 assert mock_db.get_metadata.call_count == 2 def test_error_handling_stop(self): """Test error handling with stop strategy.""" # Create test directory with no files result = self.runner.invoke(cli, [ 'ingest-dir', str(self.test_dir), '--error-handling', 'stop', '--quiet' ]) # Should exit cleanly when no files found assert result.exit_code == 0 def test_invalid_directory(self): """Test handling of invalid directory.""" result = self.runner.invoke(cli, [ 'ingest-dir', '/nonexistent/directory', '--quiet' ]) # Should exit with error assert result.exit_code == 2 # Click argument validation error @patch('markitect.database.DatabaseManager') def test_custom_pattern(self, mock_db_manager): """Test custom file pattern matching.""" # Create test files with different extensions self.create_test_files({ 'file1.md': '# Test 1', 'file2.markdown': '# Test 2', 'file3.txt': 'Not markdown' }) # Mock database mock_db = Mock() mock_db_manager.return_value = mock_db result = self.runner.invoke(cli, [ 'ingest-dir', str(self.test_dir), '--pattern', '*.markdown', '--quiet' ]) assert result.exit_code == 0 # Should process only .markdown files assert mock_db.store_document.call_count == 1 class TestErrorHandling: """Test error handling scenarios.""" def setup_method(self): """Set up test environment.""" self.temp_dir = tempfile.mkdtemp() self.test_dir = Path(self.temp_dir) def teardown_method(self): """Clean up test environment.""" shutil.rmtree(self.temp_dir) def test_permission_error_handling(self): """Test handling of permission errors.""" processor = BatchProcessor(show_progress=False) # Mock os.listdir to raise PermissionError with patch('pathlib.Path.iterdir') as mock_iterdir: mock_iterdir.side_effect = PermissionError("Permission denied") files = processor.find_markdown_files(self.test_dir) # Should return empty list without crashing assert files == [] def test_nonexistent_directory(self): """Test handling of nonexistent directories.""" processor = BatchProcessor() with pytest.raises(FileNotFoundError): processor.find_markdown_files(Path("/nonexistent/directory")) def test_file_as_directory(self): """Test handling when a file is passed as directory.""" # Create a file test_file = self.test_dir / "test.md" test_file.write_text("# Test") processor = BatchProcessor() with pytest.raises(NotADirectoryError): processor.find_markdown_files(test_file) class TestEdgeCases: """Test edge cases and boundary conditions.""" def setup_method(self): """Set up test environment.""" self.temp_dir = tempfile.mkdtemp() self.test_dir = Path(self.temp_dir) def teardown_method(self): """Clean up test environment.""" shutil.rmtree(self.temp_dir) def test_empty_directory(self): """Test processing empty directory.""" processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir) assert files == [] def test_hidden_directories(self): """Test that hidden directories are skipped.""" # Create hidden directory hidden_dir = self.test_dir / ".hidden" hidden_dir.mkdir() (hidden_dir / "test.md").write_text("# Hidden") processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir, recursive=True) # Should not find files in hidden directories assert len(files) == 0 def test_depth_zero(self): """Test depth=0 behavior.""" # Create nested structure (self.test_dir / "file1.md").write_text("# Test 1") subdir = self.test_dir / "subdir" subdir.mkdir() (subdir / "file2.md").write_text("# Test 2") processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir, recursive=True, depth=0) # Depth 0 should only include files in the starting directory # With our corrected logic, this should only find file1.md assert len(files) == 1 assert files[0].name == "file1.md" def test_very_deep_structure(self): """Test with very deep directory structure.""" # Create 10-level deep structure # Start with a file at the root level (self.test_dir / "file_root.md").write_text("# Root Test") current_dir = self.test_dir for i in range(10): current_dir = current_dir / f"level{i}" current_dir.mkdir() (current_dir / f"file{i}.md").write_text(f"# Test {i}") processor = BatchProcessor() files = processor.find_markdown_files(self.test_dir, recursive=True, depth=5) # Should find files up to depth 5 # Root (depth 0) + levels 0-4 (depths 1-5) = 6 files assert len(files) == 6 def test_glob_with_no_matches(self): """Test glob pattern with no matches.""" processor = BatchProcessor() files = processor.find_files_by_glob(str(self.test_dir / "*.nonexistent")) assert files == [] def test_file_deleted_during_processing(self): """Test handling file deletion during processing.""" # Create test file test_file = self.test_dir / "test.md" test_file.write_text("# Test") def mock_processor(file_path): # This test is actually checking the file existence in the process_files loop # not the processor function itself return ProcessingResult(file_path, True, "Processed") processor = BatchProcessor(show_progress=False) files = [test_file] # Delete the file after creating the file list but before processing test_file.unlink() result = processor.process_files(files, mock_processor, "Testing") # Should handle gracefully - file should be skipped assert result.skipped == 1 assert result.processed == 0