diff --git a/markitect/batch_processor.py b/markitect/batch_processor.py new file mode 100644 index 00000000..b22f1673 --- /dev/null +++ b/markitect/batch_processor.py @@ -0,0 +1,379 @@ +""" +Batch Processing and Recursive Operations - Issue #17 + +This module provides batch processing capabilities for MarkiTect, allowing +users to process multiple files and directories recursively through CLI. + +Features: +- Directory processing with recursive support +- Glob pattern matching for file selection +- Progress tracking with user feedback +- Error handling with continuation options +- Depth control for recursive operations + +Commands implemented: +- ingest-dir: Process all Markdown files in directory +- batch-process: Process files matching glob pattern +- recursive operations with depth control +""" + +import os +import glob +import fnmatch +from pathlib import Path +from typing import List, Optional, Dict, Any, Iterator, Callable +from dataclasses import dataclass +from enum import Enum +import click + + +class ProcessingMode(Enum): + """Modes for batch processing operations.""" + INGEST = "ingest" + STATUS = "status" + VALIDATE = "validate" + GENERATE = "generate" + + +class ErrorHandling(Enum): + """Error handling strategies for batch operations.""" + STOP = "stop" # Stop on first error + CONTINUE = "continue" # Continue processing, collect errors + SKIP = "skip" # Skip failed files, no error collection + + +@dataclass +class ProcessingResult: + """Result of processing a single file.""" + file_path: Path + success: bool + message: str + error: Optional[str] = None + processing_time: Optional[float] = None + + +@dataclass +class BatchResult: + """Result of a batch processing operation.""" + total_files: int + processed: int + succeeded: int + failed: int + skipped: int + errors: List[ProcessingResult] + processing_time: float + + +class ProgressTracker: + """Progress tracking for batch operations.""" + + def __init__(self, total: int, show_progress: bool = True): + self.total = total + self.processed = 0 + self.succeeded = 0 + self.failed = 0 + self.skipped = 0 + self.show_progress = show_progress + + def update(self, result: ProcessingResult): + """Update progress with a processing result.""" + self.processed += 1 + if result.success: + self.succeeded += 1 + else: + self.failed += 1 + + if self.show_progress: + self._display_progress(result) + + def skip_file(self, file_path: Path, reason: str): + """Mark a file as skipped.""" + self.skipped += 1 + if self.show_progress: + click.echo(f"⚠️ Skipped {file_path}: {reason}") + + def _display_progress(self, result: ProcessingResult): + """Display progress information.""" + status = "✅" if result.success else "❌" + percentage = (self.processed / self.total) * 100 + + click.echo(f"{status} [{self.processed}/{self.total}] ({percentage:.1f}%) {result.file_path}") + + if not result.success and result.error: + click.echo(f" Error: {result.error}") + + +class BatchProcessor: + """Core batch processing engine.""" + + def __init__(self, + error_handling: ErrorHandling = ErrorHandling.CONTINUE, + show_progress: bool = True, + max_depth: Optional[int] = None): + self.error_handling = error_handling + self.show_progress = show_progress + self.max_depth = max_depth + + def find_markdown_files(self, + directory: Path, + pattern: str = "*.md", + recursive: bool = False, + depth: Optional[int] = None) -> List[Path]: + """ + Find markdown files in directory with pattern matching. + + Args: + directory: Directory to search + pattern: Glob pattern for file matching + recursive: Whether to search recursively + depth: Maximum depth for recursive search + + Returns: + List of matching file paths + """ + files = [] + + if not directory.exists(): + raise FileNotFoundError(f"Directory not found: {directory}") + + if not directory.is_dir(): + raise NotADirectoryError(f"Path is not a directory: {directory}") + + if recursive: + effective_depth = depth if depth is not None else self.max_depth + files.extend(self._find_recursive(directory, pattern, effective_depth)) + else: + # Non-recursive: only current directory + files.extend(self._find_in_directory(directory, pattern)) + + return sorted(files) + + def _find_recursive(self, directory: Path, pattern: str, max_depth: Optional[int]) -> List[Path]: + """Find files recursively with depth control.""" + files = [] + + def _search(current_dir: Path, current_depth: int): + # Add files from current directory (if within depth limit) + if max_depth is None or current_depth <= max_depth: + files.extend(self._find_in_directory(current_dir, pattern)) + + # Recurse into subdirectories (if we haven't reached depth limit) + if max_depth is None or current_depth < max_depth: + try: + for item in current_dir.iterdir(): + if item.is_dir() and not item.name.startswith('.'): + _search(item, current_depth + 1) + except PermissionError: + # Skip directories we can't access + if self.show_progress: + click.echo(f"⚠️ Permission denied: {current_dir}") + + _search(directory, 0) + return files + + def _find_in_directory(self, directory: Path, pattern: str) -> List[Path]: + """Find files matching pattern in a specific directory.""" + files = [] + + try: + for item in directory.iterdir(): + if item.is_file() and fnmatch.fnmatch(item.name, pattern): + files.append(item) + except PermissionError: + if self.show_progress: + click.echo(f"⚠️ Permission denied: {directory}") + + return files + + def find_files_by_glob(self, glob_pattern: str) -> List[Path]: + """ + Find files using glob patterns. + + Args: + glob_pattern: Glob pattern (e.g., "**/*.md", "docs/*.markdown") + + Returns: + List of matching file paths + """ + matches = glob.glob(glob_pattern, recursive=True) + return [Path(match) for match in matches if Path(match).is_file()] + + def process_files(self, + files: List[Path], + processor_func: Callable[[Path], ProcessingResult], + operation_name: str = "Processing") -> BatchResult: + """ + Process a list of files with progress tracking and error handling. + + Args: + files: List of files to process + processor_func: Function to process each file + operation_name: Name of the operation for progress display + + Returns: + BatchResult with processing statistics + """ + import time + start_time = time.time() + + if self.show_progress: + click.echo(f"🚀 {operation_name} {len(files)} files...") + + tracker = ProgressTracker(len(files), self.show_progress) + errors = [] + + for file_path in files: + try: + # Check if file still exists (might have been deleted during processing) + if not file_path.exists(): + tracker.skip_file(file_path, "File no longer exists") + continue + + # Process the file + result = processor_func(file_path) + tracker.update(result) + + if not result.success: + errors.append(result) + + # Handle errors based on strategy + if self.error_handling == ErrorHandling.STOP: + break + + except Exception as e: + # Handle unexpected errors + error_result = ProcessingResult( + file_path=file_path, + success=False, + message=f"Unexpected error: {str(e)}", + error=str(e) + ) + tracker.update(error_result) + errors.append(error_result) + + if self.error_handling == ErrorHandling.STOP: + break + + processing_time = time.time() - start_time + + result = BatchResult( + total_files=len(files), + processed=tracker.processed, + succeeded=tracker.succeeded, + failed=tracker.failed, + skipped=tracker.skipped, + errors=errors, + processing_time=processing_time + ) + + if self.show_progress: + self._display_summary(result, operation_name) + + return result + + def _display_summary(self, result: BatchResult, operation_name: str): + """Display batch processing summary.""" + click.echo(f"\n📊 {operation_name} Summary:") + click.echo(f" Total files: {result.total_files}") + click.echo(f" Processed: {result.processed}") + click.echo(f" Succeeded: {result.succeeded}") + click.echo(f" Failed: {result.failed}") + click.echo(f" Skipped: {result.skipped}") + click.echo(f" Processing time: {result.processing_time:.2f}s") + + if result.failed > 0: + click.echo(f"\n❌ {result.failed} files failed:") + for error in result.errors[:10]: # Show first 10 errors + click.echo(f" • {error.file_path}: {error.message}") + + if len(result.errors) > 10: + click.echo(f" ... and {len(result.errors) - 10} more errors") + + +def create_file_processor(config: Dict[str, Any], + operation: ProcessingMode) -> Callable[[Path], ProcessingResult]: + """ + Create a file processor function for the specified operation. + + Args: + config: Configuration dictionary + operation: Type of processing operation + + Returns: + Function that processes a single file and returns ProcessingResult + """ + import time + + def process_file(file_path: Path) -> ProcessingResult: + """Process a single file based on the operation type.""" + start_time = time.time() + + try: + if operation == ProcessingMode.INGEST: + # Ingest file into database + from .database import DatabaseManager + db_manager = DatabaseManager(config.get('database')) + + # Read file content + content = file_path.read_text(encoding='utf-8') + + # Store in database + db_manager.store_document(str(file_path), content) + + processing_time = time.time() - start_time + return ProcessingResult( + file_path=file_path, + success=True, + message="Ingested successfully", + processing_time=processing_time + ) + + elif operation == ProcessingMode.STATUS: + # Check file status + from .database import DatabaseManager + db_manager = DatabaseManager(config.get('database')) + + try: + metadata = db_manager.get_metadata(str(file_path)) + message = f"Found in database (ID: {metadata.get('id', 'Unknown')})" + except: + message = "Not found in database" + + processing_time = time.time() - start_time + return ProcessingResult( + file_path=file_path, + success=True, + message=message, + processing_time=processing_time + ) + + elif operation == ProcessingMode.VALIDATE: + # Validate file format/content + content = file_path.read_text(encoding='utf-8') + + # Basic validation - check if it's valid markdown + if not content.strip(): + raise ValueError("File is empty") + + processing_time = time.time() - start_time + return ProcessingResult( + file_path=file_path, + success=True, + message="Valid markdown file", + processing_time=processing_time + ) + + else: + raise ValueError(f"Unsupported operation: {operation}") + + except Exception as e: + processing_time = time.time() - start_time + return ProcessingResult( + file_path=file_path, + success=False, + message=f"Failed: {str(e)}", + error=str(e), + processing_time=processing_time + ) + + return process_file \ No newline at end of file diff --git a/markitect/cli.py b/markitect/cli.py index d4defb76..a66d4cdb 100644 --- a/markitect/cli.py +++ b/markitect/cli.py @@ -28,6 +28,7 @@ import builtins from .database import DatabaseManager from .legacy_compat import LegacyMode, emit_deprecation_warning, legacy_switch_option from .__version__ import get_version_info, get_release_info +from .batch_processor import BatchProcessor, ProcessingMode, ErrorHandling, create_file_processor # Import legacy system components for advanced management try: @@ -4549,6 +4550,200 @@ def perf_history(config, limit, trend_days, output_format, output): sys.exit(1) +# Batch Processing Commands - Issue #17 + + +@cli.command(name='ingest-dir') +@click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path)) +@click.option('--recursive', '-r', is_flag=True, help='Process directories recursively') +@click.option('--depth', type=int, help='Maximum depth for recursive processing') +@click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)') +@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']), + default='continue', help='Error handling strategy') +@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output') +@pass_config +def ingest_dir(config, directory, recursive, depth, pattern, error_handling, quiet): + """Process all Markdown files in directory. + + Ingests all markdown files found in the specified directory into the database. + Supports recursive processing with depth control and flexible error handling. + + Examples: + markitect ingest-dir ./docs + markitect ingest-dir ./content --recursive --depth 3 + markitect ingest-dir ./articles --pattern "*.markdown" --error-handling stop + """ + try: + # Convert error handling string to enum + error_strategy = ErrorHandling[error_handling.upper()] + + # Initialize batch processor + processor = BatchProcessor( + error_handling=error_strategy, + show_progress=not quiet, + max_depth=depth + ) + + # Find files to process + if not quiet: + click.echo(f"🔍 Searching for files in {directory}...") + + files = processor.find_markdown_files( + directory=directory, + pattern=pattern, + recursive=recursive, + depth=depth + ) + + if not files: + click.echo(f"📭 No files found matching pattern '{pattern}' in {directory}") + return + + # Create file processor for ingestion + file_processor = create_file_processor(config, ProcessingMode.INGEST) + + # Process files + result = processor.process_files(files, file_processor, "Ingesting") + + # Exit with error code if there were failures + if result.failed > 0 and error_strategy == ErrorHandling.STOP: + sys.exit(1) + + except Exception as e: + click.echo(f"Directory ingestion failed: {e}", err=True) + if config.get('verbose'): + import traceback + click.echo(traceback.format_exc(), err=True) + sys.exit(1) + + +@cli.command(name='batch-process') +@click.argument('pattern', type=str) +@click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']), + default='ingest', help='Operation to perform on matched files') +@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']), + default='continue', help='Error handling strategy') +@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output') +@pass_config +def batch_process(config, pattern, operation, error_handling, quiet): + """Process files matching glob pattern. + + Uses glob patterns to find and process files. Supports various operations + including ingestion, status checking, and validation. + + Examples: + markitect batch-process "**/*.md" --operation ingest + markitect batch-process "docs/**/*.markdown" --operation status + markitect batch-process "./content/*.md" --operation validate --error-handling stop + """ + try: + # Convert strings to enums + error_strategy = ErrorHandling[error_handling.upper()] + processing_mode = ProcessingMode[operation.upper()] + + # Initialize batch processor + processor = BatchProcessor( + error_handling=error_strategy, + show_progress=not quiet + ) + + # Find files using glob pattern + if not quiet: + click.echo(f"🔍 Searching for files matching '{pattern}'...") + + files = processor.find_files_by_glob(pattern) + + if not files: + click.echo(f"📭 No files found matching pattern '{pattern}'") + return + + # Create file processor for the specified operation + file_processor = create_file_processor(config, processing_mode) + + # Process files + operation_name = f"{operation.title()}ing" + result = processor.process_files(files, file_processor, operation_name) + + # Exit with error code if there were failures + if result.failed > 0 and error_strategy == ErrorHandling.STOP: + sys.exit(1) + + except Exception as e: + click.echo(f"Batch processing failed: {e}", err=True) + if config.get('verbose'): + import traceback + click.echo(traceback.format_exc(), err=True) + sys.exit(1) + + +@cli.command(name='recursive') +@click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path)) +@click.option('--depth', type=int, default=None, help='Maximum recursion depth') +@click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']), + default='status', help='Operation to perform') +@click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)') +@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']), + default='continue', help='Error handling strategy') +@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output') +@pass_config +def recursive(config, directory, depth, operation, pattern, error_handling, quiet): + """Recursive processing with depth control. + + Performs recursive operations on directory trees with configurable depth limits. + This command provides fine-grained control over recursive processing behavior. + + Examples: + markitect recursive ./docs --depth 2 --operation ingest + markitect recursive ./content --depth 5 --operation status --pattern "*.markdown" + markitect recursive ./src --operation validate --error-handling stop + """ + try: + # Convert strings to enums + error_strategy = ErrorHandling[error_handling.upper()] + processing_mode = ProcessingMode[operation.upper()] + + # Initialize batch processor with depth control + processor = BatchProcessor( + error_handling=error_strategy, + show_progress=not quiet, + max_depth=depth + ) + + # Find files recursively + if not quiet: + depth_str = f" (max depth: {depth})" if depth else "" + click.echo(f"🔍 Recursively searching {directory}{depth_str}...") + + files = processor.find_markdown_files( + directory=directory, + pattern=pattern, + recursive=True, + depth=depth + ) + + if not files: + click.echo(f"📭 No files found matching pattern '{pattern}' in {directory}") + return + + # Create file processor for the specified operation + file_processor = create_file_processor(config, processing_mode) + + # Process files + operation_name = f"Recursively {operation}ing" + result = processor.process_files(files, file_processor, operation_name) + + # Exit with error code if there were failures + if result.failed > 0 and error_strategy == ErrorHandling.STOP: + sys.exit(1) + + except Exception as e: + click.echo(f"Recursive processing failed: {e}", err=True) + if config.get('verbose'): + import traceback + click.echo(traceback.format_exc(), err=True) + sys.exit(1) + + # Register issue management commands cli.add_command(issues_group) diff --git a/tests/test_issue_17_batch_processing.py b/tests/test_issue_17_batch_processing.py new file mode 100644 index 00000000..bd5b15ce --- /dev/null +++ b/tests/test_issue_17_batch_processing.py @@ -0,0 +1,653 @@ +""" +Tests for Issue #17: Batch Processing and Recursive Operations + +This test suite verifies the batch processing functionality including: +- Directory processing with recursive support +- Glob pattern matching for file selection +- Progress tracking and error handling +- Depth control for recursive operations +""" + +import pytest +import tempfile +import shutil +from pathlib import Path +from unittest.mock import Mock, patch, MagicMock +from click.testing import CliRunner + +from markitect.batch_processor import ( + BatchProcessor, ProcessingMode, ErrorHandling, + ProcessingResult, BatchResult, ProgressTracker, + create_file_processor +) +from markitect.cli import cli + + +class TestBatchProcessor: + """Test the core BatchProcessor functionality.""" + + def setup_method(self): + """Set up test environment.""" + self.temp_dir = tempfile.mkdtemp() + self.test_dir = Path(self.temp_dir) + + def teardown_method(self): + """Clean up test environment.""" + shutil.rmtree(self.temp_dir) + + def create_test_files(self, structure): + """Create test file structure from dict.""" + for path, content in structure.items(): + file_path = self.test_dir / path + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(content) + + def test_find_markdown_files_non_recursive(self): + """Test finding markdown files without recursion.""" + # Create test structure + self.create_test_files({ + 'file1.md': '# Test 1', + 'file2.md': '# Test 2', + 'file3.txt': 'Not markdown', + 'subdir/file4.md': '# Test 4' + }) + + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir, recursive=False) + + # Should find only files in root directory + assert len(files) == 2 + file_names = [f.name for f in files] + assert 'file1.md' in file_names + assert 'file2.md' in file_names + assert 'file4.md' not in file_names + + def test_find_markdown_files_recursive(self): + """Test finding markdown files with recursion.""" + # Create test structure + self.create_test_files({ + 'file1.md': '# Test 1', + 'subdir/file2.md': '# Test 2', + 'subdir/nested/file3.md': '# Test 3', + 'subdir/file4.txt': 'Not markdown' + }) + + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir, recursive=True) + + # Should find all markdown files + assert len(files) == 3 + file_names = [f.name for f in files] + assert 'file1.md' in file_names + assert 'file2.md' in file_names + assert 'file3.md' in file_names + + def test_find_markdown_files_with_depth_limit(self): + """Test recursive search with depth limit.""" + # Create test structure + self.create_test_files({ + 'file1.md': '# Test 1', + 'level1/file2.md': '# Test 2', + 'level1/level2/file3.md': '# Test 3', + 'level1/level2/level3/file4.md': '# Test 4' + }) + + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir, recursive=True, depth=1) + + # Should find files up to depth 1 + assert len(files) == 2 + file_names = [f.name for f in files] + assert 'file1.md' in file_names + assert 'file2.md' in file_names + assert 'file3.md' not in file_names + assert 'file4.md' not in file_names + + def test_find_markdown_files_with_pattern(self): + """Test finding files with custom pattern.""" + # Create test structure + self.create_test_files({ + 'file1.md': '# Test 1', + 'file2.markdown': '# Test 2', + 'file3.txt': 'Not markdown' + }) + + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir, pattern='*.markdown') + + # Should find only .markdown files + assert len(files) == 1 + assert files[0].name == 'file2.markdown' + + def test_find_files_by_glob(self): + """Test glob pattern file finding.""" + # Create test structure + self.create_test_files({ + 'docs/file1.md': '# Test 1', + 'docs/subdir/file2.md': '# Test 2', + 'src/file3.md': '# Test 3', + 'file4.txt': 'Not markdown' + }) + + processor = BatchProcessor() + + # Test recursive glob + files = processor.find_files_by_glob(str(self.test_dir / "**/*.md")) + assert len(files) == 3 + + # Test specific directory glob + files = processor.find_files_by_glob(str(self.test_dir / "docs/*.md")) + assert len(files) == 1 + assert files[0].name == 'file1.md' + + def test_process_files_success(self): + """Test successful file processing.""" + # Create test files + self.create_test_files({ + 'file1.md': '# Test 1', + 'file2.md': '# Test 2' + }) + + processor = BatchProcessor(show_progress=False) + files = list(self.test_dir.glob('*.md')) + + def mock_processor(file_path): + return ProcessingResult( + file_path=file_path, + success=True, + message="Processed successfully" + ) + + result = processor.process_files(files, mock_processor, "Testing") + + assert result.total_files == 2 + assert result.processed == 2 + assert result.succeeded == 2 + assert result.failed == 0 + assert result.skipped == 0 + + def test_process_files_with_errors(self): + """Test file processing with errors.""" + # Create test files + self.create_test_files({ + 'file1.md': '# Test 1', + 'file2.md': '# Test 2', + 'file3.md': '# Test 3' + }) + + processor = BatchProcessor(show_progress=False, error_handling=ErrorHandling.CONTINUE) + files = list(self.test_dir.glob('*.md')) + + def mock_processor(file_path): + # Fail on file2.md + if file_path.name == 'file2.md': + return ProcessingResult( + file_path=file_path, + success=False, + message="Processing failed", + error="Mock error" + ) + return ProcessingResult( + file_path=file_path, + success=True, + message="Processed successfully" + ) + + result = processor.process_files(files, mock_processor, "Testing") + + assert result.total_files == 3 + assert result.processed == 3 + assert result.succeeded == 2 + assert result.failed == 1 + assert len(result.errors) == 1 + + def test_process_files_stop_on_error(self): + """Test stop-on-error behavior.""" + # Create test files + self.create_test_files({ + 'file1.md': '# Test 1', + 'file2.md': '# Test 2', + 'file3.md': '# Test 3' + }) + + processor = BatchProcessor(show_progress=False, error_handling=ErrorHandling.STOP) + files = sorted(list(self.test_dir.glob('*.md'))) + + def mock_processor(file_path): + # Fail on second file + if file_path.name == 'file2.md': + return ProcessingResult( + file_path=file_path, + success=False, + message="Processing failed", + error="Mock error" + ) + return ProcessingResult( + file_path=file_path, + success=True, + message="Processed successfully" + ) + + result = processor.process_files(files, mock_processor, "Testing") + + # Should stop after the error + assert result.processed == 2 # file1 success, file2 error + assert result.succeeded == 1 + assert result.failed == 1 + + +class TestProgressTracker: + """Test the ProgressTracker functionality.""" + + def test_progress_tracking(self): + """Test basic progress tracking.""" + tracker = ProgressTracker(total=3, show_progress=False) + + # Test successful processing + result1 = ProcessingResult(Path("file1.md"), True, "Success") + tracker.update(result1) + + assert tracker.processed == 1 + assert tracker.succeeded == 1 + assert tracker.failed == 0 + + # Test failed processing + result2 = ProcessingResult(Path("file2.md"), False, "Failed", "Error message") + tracker.update(result2) + + assert tracker.processed == 2 + assert tracker.succeeded == 1 + assert tracker.failed == 1 + + # Test skipped file + tracker.skip_file(Path("file3.md"), "Skipped reason") + assert tracker.skipped == 1 + + +class TestFileProcessor: + """Test the file processor creation and execution.""" + + def setup_method(self): + """Set up test environment.""" + self.temp_dir = tempfile.mkdtemp() + self.test_dir = Path(self.temp_dir) + + def teardown_method(self): + """Clean up test environment.""" + shutil.rmtree(self.temp_dir) + + @patch('markitect.database.DatabaseManager') + def test_ingest_processor(self, mock_db_manager): + """Test file processor for ingestion.""" + # Create test file + test_file = self.test_dir / "test.md" + test_file.write_text("# Test content") + + # Mock database manager + mock_db = Mock() + mock_db_manager.return_value = mock_db + + config = {'database': 'test.db'} + processor = create_file_processor(config, ProcessingMode.INGEST) + + result = processor(test_file) + + assert result.success + assert result.file_path == test_file + assert "Ingested successfully" in result.message + mock_db.store_document.assert_called_once() + + @patch('markitect.database.DatabaseManager') + def test_status_processor(self, mock_db_manager): + """Test file processor for status checking.""" + # Create test file + test_file = self.test_dir / "test.md" + test_file.write_text("# Test content") + + # Mock database manager + mock_db = Mock() + mock_db.get_metadata.return_value = {'id': 'test123'} + mock_db_manager.return_value = mock_db + + config = {'database': 'test.db'} + processor = create_file_processor(config, ProcessingMode.STATUS) + + result = processor(test_file) + + assert result.success + assert result.file_path == test_file + assert "Found in database" in result.message + + def test_validate_processor(self): + """Test file processor for validation.""" + # Create test file + test_file = self.test_dir / "test.md" + test_file.write_text("# Test content") + + config = {} + processor = create_file_processor(config, ProcessingMode.VALIDATE) + + result = processor(test_file) + + assert result.success + assert result.file_path == test_file + assert "Valid markdown" in result.message + + def test_validate_processor_empty_file(self): + """Test validation processor with empty file.""" + # Create empty file + test_file = self.test_dir / "empty.md" + test_file.write_text("") + + config = {} + processor = create_file_processor(config, ProcessingMode.VALIDATE) + + result = processor(test_file) + + assert not result.success + assert "File is empty" in result.error + + +class TestCLIIntegration: + """Test CLI command integration.""" + + def setup_method(self): + """Set up test environment.""" + self.temp_dir = tempfile.mkdtemp() + self.test_dir = Path(self.temp_dir) + self.runner = CliRunner() + + def teardown_method(self): + """Clean up test environment.""" + shutil.rmtree(self.temp_dir) + + def create_test_files(self, structure): + """Create test file structure from dict.""" + for path, content in structure.items(): + file_path = self.test_dir / path + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(content) + + @patch('markitect.database.DatabaseManager') + def test_ingest_dir_command(self, mock_db_manager): + """Test ingest-dir CLI command.""" + # Create test files + self.create_test_files({ + 'file1.md': '# Test 1', + 'file2.md': '# Test 2', + 'subdir/file3.md': '# Test 3' + }) + + # Mock database + mock_db = Mock() + mock_db_manager.return_value = mock_db + + result = self.runner.invoke(cli, [ + 'ingest-dir', str(self.test_dir), + '--quiet' + ]) + + assert result.exit_code == 0 + # Should process 2 files (non-recursive by default) + assert mock_db.store_document.call_count == 2 + + @patch('markitect.database.DatabaseManager') + def test_ingest_dir_recursive(self, mock_db_manager): + """Test ingest-dir with recursive option.""" + # Create test files + self.create_test_files({ + 'file1.md': '# Test 1', + 'subdir/file2.md': '# Test 2', + 'subdir/nested/file3.md': '# Test 3' + }) + + # Mock database + mock_db = Mock() + mock_db_manager.return_value = mock_db + + result = self.runner.invoke(cli, [ + 'ingest-dir', str(self.test_dir), + '--recursive', + '--quiet' + ]) + + assert result.exit_code == 0 + # Should process all 3 files + assert mock_db.store_document.call_count == 3 + + @patch('markitect.database.DatabaseManager') + def test_batch_process_command(self, mock_db_manager): + """Test batch-process CLI command.""" + # Create test files + self.create_test_files({ + 'docs/file1.md': '# Test 1', + 'docs/file2.md': '# Test 2', + 'src/file3.md': '# Test 3' + }) + + # Mock database + mock_db = Mock() + mock_db_manager.return_value = mock_db + + # Test glob pattern + pattern = str(self.test_dir / "docs/*.md") + result = self.runner.invoke(cli, [ + 'batch-process', pattern, + '--operation', 'ingest', + '--quiet' + ]) + + assert result.exit_code == 0 + # Should process 2 files from docs directory + assert mock_db.store_document.call_count == 2 + + @patch('markitect.database.DatabaseManager') + def test_recursive_command(self, mock_db_manager): + """Test recursive CLI command.""" + # Create test files + self.create_test_files({ + 'level1/file1.md': '# Test 1', + 'level1/level2/file2.md': '# Test 2', + 'level1/level2/level3/file3.md': '# Test 3' + }) + + # Mock database + mock_db = Mock() + mock_db.get_metadata.side_effect = Exception("Not found") + mock_db_manager.return_value = mock_db + + result = self.runner.invoke(cli, [ + 'recursive', str(self.test_dir), + '--depth', '2', + '--operation', 'status', + '--quiet' + ]) + + assert result.exit_code == 0 + # Should check status for files up to depth 2 + assert mock_db.get_metadata.call_count == 2 + + def test_error_handling_stop(self): + """Test error handling with stop strategy.""" + # Create test directory with no files + result = self.runner.invoke(cli, [ + 'ingest-dir', str(self.test_dir), + '--error-handling', 'stop', + '--quiet' + ]) + + # Should exit cleanly when no files found + assert result.exit_code == 0 + + def test_invalid_directory(self): + """Test handling of invalid directory.""" + result = self.runner.invoke(cli, [ + 'ingest-dir', '/nonexistent/directory', + '--quiet' + ]) + + # Should exit with error + assert result.exit_code == 2 # Click argument validation error + + @patch('markitect.database.DatabaseManager') + def test_custom_pattern(self, mock_db_manager): + """Test custom file pattern matching.""" + # Create test files with different extensions + self.create_test_files({ + 'file1.md': '# Test 1', + 'file2.markdown': '# Test 2', + 'file3.txt': 'Not markdown' + }) + + # Mock database + mock_db = Mock() + mock_db_manager.return_value = mock_db + + result = self.runner.invoke(cli, [ + 'ingest-dir', str(self.test_dir), + '--pattern', '*.markdown', + '--quiet' + ]) + + assert result.exit_code == 0 + # Should process only .markdown files + assert mock_db.store_document.call_count == 1 + + +class TestErrorHandling: + """Test error handling scenarios.""" + + def setup_method(self): + """Set up test environment.""" + self.temp_dir = tempfile.mkdtemp() + self.test_dir = Path(self.temp_dir) + + def teardown_method(self): + """Clean up test environment.""" + shutil.rmtree(self.temp_dir) + + def test_permission_error_handling(self): + """Test handling of permission errors.""" + processor = BatchProcessor(show_progress=False) + + # Mock os.listdir to raise PermissionError + with patch('pathlib.Path.iterdir') as mock_iterdir: + mock_iterdir.side_effect = PermissionError("Permission denied") + + files = processor.find_markdown_files(self.test_dir) + # Should return empty list without crashing + assert files == [] + + def test_nonexistent_directory(self): + """Test handling of nonexistent directories.""" + processor = BatchProcessor() + + with pytest.raises(FileNotFoundError): + processor.find_markdown_files(Path("/nonexistent/directory")) + + def test_file_as_directory(self): + """Test handling when a file is passed as directory.""" + # Create a file + test_file = self.test_dir / "test.md" + test_file.write_text("# Test") + + processor = BatchProcessor() + + with pytest.raises(NotADirectoryError): + processor.find_markdown_files(test_file) + + +class TestEdgeCases: + """Test edge cases and boundary conditions.""" + + def setup_method(self): + """Set up test environment.""" + self.temp_dir = tempfile.mkdtemp() + self.test_dir = Path(self.temp_dir) + + def teardown_method(self): + """Clean up test environment.""" + shutil.rmtree(self.temp_dir) + + def test_empty_directory(self): + """Test processing empty directory.""" + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir) + assert files == [] + + def test_hidden_directories(self): + """Test that hidden directories are skipped.""" + # Create hidden directory + hidden_dir = self.test_dir / ".hidden" + hidden_dir.mkdir() + (hidden_dir / "test.md").write_text("# Hidden") + + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir, recursive=True) + + # Should not find files in hidden directories + assert len(files) == 0 + + def test_depth_zero(self): + """Test depth=0 behavior.""" + # Create nested structure + (self.test_dir / "file1.md").write_text("# Test 1") + subdir = self.test_dir / "subdir" + subdir.mkdir() + (subdir / "file2.md").write_text("# Test 2") + + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir, recursive=True, depth=0) + + # Depth 0 should only include files in the starting directory + # With our corrected logic, this should only find file1.md + assert len(files) == 1 + assert files[0].name == "file1.md" + + def test_very_deep_structure(self): + """Test with very deep directory structure.""" + # Create 10-level deep structure + # Start with a file at the root level + (self.test_dir / "file_root.md").write_text("# Root Test") + + current_dir = self.test_dir + for i in range(10): + current_dir = current_dir / f"level{i}" + current_dir.mkdir() + (current_dir / f"file{i}.md").write_text(f"# Test {i}") + + processor = BatchProcessor() + files = processor.find_markdown_files(self.test_dir, recursive=True, depth=5) + + # Should find files up to depth 5 + # Root (depth 0) + levels 0-4 (depths 1-5) = 6 files + assert len(files) == 6 + + def test_glob_with_no_matches(self): + """Test glob pattern with no matches.""" + processor = BatchProcessor() + files = processor.find_files_by_glob(str(self.test_dir / "*.nonexistent")) + assert files == [] + + def test_file_deleted_during_processing(self): + """Test handling file deletion during processing.""" + # Create test file + test_file = self.test_dir / "test.md" + test_file.write_text("# Test") + + def mock_processor(file_path): + # This test is actually checking the file existence in the process_files loop + # not the processor function itself + return ProcessingResult(file_path, True, "Processed") + + processor = BatchProcessor(show_progress=False) + files = [test_file] + + # Delete the file after creating the file list but before processing + test_file.unlink() + + result = processor.process_files(files, mock_processor, "Testing") + + # Should handle gracefully - file should be skipped + assert result.skipped == 1 + assert result.processed == 0 \ No newline at end of file