feat: implement batch processing and recursive operations (issue #17)

Complete implementation of batch processing capabilities for MarkiTect CLI: New CLI Commands: - markitect ingest-dir: Process all markdown files in directory with recursive support - markitect batch-process: Process files matching glob patterns - markitect recursive: Recursive processing with depth control Core Features: - Sophisticated batch processing engine with progress tracking - Multiple error handling strategies (stop, continue, skip) - Recursive directory traversal with configurable depth limits - Glob pattern matching for flexible file selection - Progress feedback with detailed processing statistics - Integration with existing database and caching systems Technical Implementation: - BatchProcessor class with modular architecture - ProgressTracker for real-time user feedback - Comprehensive error handling and edge case management - Support for multiple operations (ingest, status, validate) - Depth-controlled recursive search with proper boundary handling - Permission error resilience and graceful degradation Testing: - 29 comprehensive tests covering all functionality - Edge cases: empty directories, hidden files, permission errors - CLI integration tests with mocked database operations - Depth logic validation and boundary condition testing - Error handling scenarios and recovery mechanisms All acceptance criteria fulfilled: ✅ Directory and recursive processing ✅ Glob pattern support for file selection ✅ Progress tracking and user feedback ✅ Error handling with continuation options ✅ Comprehensive test coverage 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-03 10:45:43 +02:00
parent a4805812f3
commit 0982e771e4
3 changed files with 1227 additions and 0 deletions
--- a/markitect/batch_processor.py
+++ b/markitect/batch_processor.py
@@ -0,0 +1,379 @@
+"""
+Batch Processing and Recursive Operations - Issue #17
+
+This module provides batch processing capabilities for MarkiTect, allowing
+users to process multiple files and directories recursively through CLI.
+
+Features:
+- Directory processing with recursive support
+- Glob pattern matching for file selection
+- Progress tracking with user feedback
+- Error handling with continuation options
+- Depth control for recursive operations
+
+Commands implemented:
+- ingest-dir: Process all Markdown files in directory
+- batch-process: Process files matching glob pattern
+- recursive operations with depth control
+"""
+
+import os
+import glob
+import fnmatch
+from pathlib import Path
+from typing import List, Optional, Dict, Any, Iterator, Callable
+from dataclasses import dataclass
+from enum import Enum
+import click
+
+
+class ProcessingMode(Enum):
+    """Modes for batch processing operations."""
+    INGEST = "ingest"
+    STATUS = "status"
+    VALIDATE = "validate"
+    GENERATE = "generate"
+
+
+class ErrorHandling(Enum):
+    """Error handling strategies for batch operations."""
+    STOP = "stop"          # Stop on first error
+    CONTINUE = "continue"  # Continue processing, collect errors
+    SKIP = "skip"         # Skip failed files, no error collection
+
+
+@dataclass
+class ProcessingResult:
+    """Result of processing a single file."""
+    file_path: Path
+    success: bool
+    message: str
+    error: Optional[str] = None
+    processing_time: Optional[float] = None
+
+
+@dataclass
+class BatchResult:
+    """Result of a batch processing operation."""
+    total_files: int
+    processed: int
+    succeeded: int
+    failed: int
+    skipped: int
+    errors: List[ProcessingResult]
+    processing_time: float
+
+
+class ProgressTracker:
+    """Progress tracking for batch operations."""
+
+    def __init__(self, total: int, show_progress: bool = True):
+        self.total = total
+        self.processed = 0
+        self.succeeded = 0
+        self.failed = 0
+        self.skipped = 0
+        self.show_progress = show_progress
+
+    def update(self, result: ProcessingResult):
+        """Update progress with a processing result."""
+        self.processed += 1
+        if result.success:
+            self.succeeded += 1
+        else:
+            self.failed += 1
+
+        if self.show_progress:
+            self._display_progress(result)
+
+    def skip_file(self, file_path: Path, reason: str):
+        """Mark a file as skipped."""
+        self.skipped += 1
+        if self.show_progress:
+            click.echo(f"⚠️  Skipped {file_path}: {reason}")
+
+    def _display_progress(self, result: ProcessingResult):
+        """Display progress information."""
+        status = "✅" if result.success else "❌"
+        percentage = (self.processed / self.total) * 100
+
+        click.echo(f"{status} [{self.processed}/{self.total}] ({percentage:.1f}%) {result.file_path}")
+
+        if not result.success and result.error:
+            click.echo(f"    Error: {result.error}")
+
+
+class BatchProcessor:
+    """Core batch processing engine."""
+
+    def __init__(self,
+                 error_handling: ErrorHandling = ErrorHandling.CONTINUE,
+                 show_progress: bool = True,
+                 max_depth: Optional[int] = None):
+        self.error_handling = error_handling
+        self.show_progress = show_progress
+        self.max_depth = max_depth
+
+    def find_markdown_files(self,
+                           directory: Path,
+                           pattern: str = "*.md",
+                           recursive: bool = False,
+                           depth: Optional[int] = None) -> List[Path]:
+        """
+        Find markdown files in directory with pattern matching.
+
+        Args:
+            directory: Directory to search
+            pattern: Glob pattern for file matching
+            recursive: Whether to search recursively
+            depth: Maximum depth for recursive search
+
+        Returns:
+            List of matching file paths
+        """
+        files = []
+
+        if not directory.exists():
+            raise FileNotFoundError(f"Directory not found: {directory}")
+
+        if not directory.is_dir():
+            raise NotADirectoryError(f"Path is not a directory: {directory}")
+
+        if recursive:
+            effective_depth = depth if depth is not None else self.max_depth
+            files.extend(self._find_recursive(directory, pattern, effective_depth))
+        else:
+            # Non-recursive: only current directory
+            files.extend(self._find_in_directory(directory, pattern))
+
+        return sorted(files)
+
+    def _find_recursive(self, directory: Path, pattern: str, max_depth: Optional[int]) -> List[Path]:
+        """Find files recursively with depth control."""
+        files = []
+
+        def _search(current_dir: Path, current_depth: int):
+            # Add files from current directory (if within depth limit)
+            if max_depth is None or current_depth <= max_depth:
+                files.extend(self._find_in_directory(current_dir, pattern))
+
+            # Recurse into subdirectories (if we haven't reached depth limit)
+            if max_depth is None or current_depth < max_depth:
+                try:
+                    for item in current_dir.iterdir():
+                        if item.is_dir() and not item.name.startswith('.'):
+                            _search(item, current_depth + 1)
+                except PermissionError:
+                    # Skip directories we can't access
+                    if self.show_progress:
+                        click.echo(f"⚠️  Permission denied: {current_dir}")
+
+        _search(directory, 0)
+        return files
+
+    def _find_in_directory(self, directory: Path, pattern: str) -> List[Path]:
+        """Find files matching pattern in a specific directory."""
+        files = []
+
+        try:
+            for item in directory.iterdir():
+                if item.is_file() and fnmatch.fnmatch(item.name, pattern):
+                    files.append(item)
+        except PermissionError:
+            if self.show_progress:
+                click.echo(f"⚠️  Permission denied: {directory}")
+
+        return files
+
+    def find_files_by_glob(self, glob_pattern: str) -> List[Path]:
+        """
+        Find files using glob patterns.
+
+        Args:
+            glob_pattern: Glob pattern (e.g., "**/*.md", "docs/*.markdown")
+
+        Returns:
+            List of matching file paths
+        """
+        matches = glob.glob(glob_pattern, recursive=True)
+        return [Path(match) for match in matches if Path(match).is_file()]
+
+    def process_files(self,
+                     files: List[Path],
+                     processor_func: Callable[[Path], ProcessingResult],
+                     operation_name: str = "Processing") -> BatchResult:
+        """
+        Process a list of files with progress tracking and error handling.
+
+        Args:
+            files: List of files to process
+            processor_func: Function to process each file
+            operation_name: Name of the operation for progress display
+
+        Returns:
+            BatchResult with processing statistics
+        """
+        import time
+        start_time = time.time()
+
+        if self.show_progress:
+            click.echo(f"🚀 {operation_name} {len(files)} files...")
+
+        tracker = ProgressTracker(len(files), self.show_progress)
+        errors = []
+
+        for file_path in files:
+            try:
+                # Check if file still exists (might have been deleted during processing)
+                if not file_path.exists():
+                    tracker.skip_file(file_path, "File no longer exists")
+                    continue
+
+                # Process the file
+                result = processor_func(file_path)
+                tracker.update(result)
+
+                if not result.success:
+                    errors.append(result)
+
+                    # Handle errors based on strategy
+                    if self.error_handling == ErrorHandling.STOP:
+                        break
+
+            except Exception as e:
+                # Handle unexpected errors
+                error_result = ProcessingResult(
+                    file_path=file_path,
+                    success=False,
+                    message=f"Unexpected error: {str(e)}",
+                    error=str(e)
+                )
+                tracker.update(error_result)
+                errors.append(error_result)
+
+                if self.error_handling == ErrorHandling.STOP:
+                    break
+
+        processing_time = time.time() - start_time
+
+        result = BatchResult(
+            total_files=len(files),
+            processed=tracker.processed,
+            succeeded=tracker.succeeded,
+            failed=tracker.failed,
+            skipped=tracker.skipped,
+            errors=errors,
+            processing_time=processing_time
+        )
+
+        if self.show_progress:
+            self._display_summary(result, operation_name)
+
+        return result
+
+    def _display_summary(self, result: BatchResult, operation_name: str):
+        """Display batch processing summary."""
+        click.echo(f"\n📊 {operation_name} Summary:")
+        click.echo(f"   Total files: {result.total_files}")
+        click.echo(f"   Processed: {result.processed}")
+        click.echo(f"   Succeeded: {result.succeeded}")
+        click.echo(f"   Failed: {result.failed}")
+        click.echo(f"   Skipped: {result.skipped}")
+        click.echo(f"   Processing time: {result.processing_time:.2f}s")
+
+        if result.failed > 0:
+            click.echo(f"\n❌ {result.failed} files failed:")
+            for error in result.errors[:10]:  # Show first 10 errors
+                click.echo(f"   • {error.file_path}: {error.message}")
+
+            if len(result.errors) > 10:
+                click.echo(f"   ... and {len(result.errors) - 10} more errors")
+
+
+def create_file_processor(config: Dict[str, Any],
+                         operation: ProcessingMode) -> Callable[[Path], ProcessingResult]:
+    """
+    Create a file processor function for the specified operation.
+
+    Args:
+        config: Configuration dictionary
+        operation: Type of processing operation
+
+    Returns:
+        Function that processes a single file and returns ProcessingResult
+    """
+    import time
+
+    def process_file(file_path: Path) -> ProcessingResult:
+        """Process a single file based on the operation type."""
+        start_time = time.time()
+
+        try:
+            if operation == ProcessingMode.INGEST:
+                # Ingest file into database
+                from .database import DatabaseManager
+                db_manager = DatabaseManager(config.get('database'))
+
+                # Read file content
+                content = file_path.read_text(encoding='utf-8')
+
+                # Store in database
+                db_manager.store_document(str(file_path), content)
+
+                processing_time = time.time() - start_time
+                return ProcessingResult(
+                    file_path=file_path,
+                    success=True,
+                    message="Ingested successfully",
+                    processing_time=processing_time
+                )
+
+            elif operation == ProcessingMode.STATUS:
+                # Check file status
+                from .database import DatabaseManager
+                db_manager = DatabaseManager(config.get('database'))
+
+                try:
+                    metadata = db_manager.get_metadata(str(file_path))
+                    message = f"Found in database (ID: {metadata.get('id', 'Unknown')})"
+                except:
+                    message = "Not found in database"
+
+                processing_time = time.time() - start_time
+                return ProcessingResult(
+                    file_path=file_path,
+                    success=True,
+                    message=message,
+                    processing_time=processing_time
+                )
+
+            elif operation == ProcessingMode.VALIDATE:
+                # Validate file format/content
+                content = file_path.read_text(encoding='utf-8')
+
+                # Basic validation - check if it's valid markdown
+                if not content.strip():
+                    raise ValueError("File is empty")
+
+                processing_time = time.time() - start_time
+                return ProcessingResult(
+                    file_path=file_path,
+                    success=True,
+                    message="Valid markdown file",
+                    processing_time=processing_time
+                )
+
+            else:
+                raise ValueError(f"Unsupported operation: {operation}")
+
+        except Exception as e:
+            processing_time = time.time() - start_time
+            return ProcessingResult(
+                file_path=file_path,
+                success=False,
+                message=f"Failed: {str(e)}",
+                error=str(e),
+                processing_time=processing_time
+            )
+
+    return process_file
--- a/markitect/cli.py
+++ b/markitect/cli.py
@@ -28,6 +28,7 @@ import builtins
 from .database import DatabaseManager
 from .legacy_compat import LegacyMode, emit_deprecation_warning, legacy_switch_option
 from .__version__ import get_version_info, get_release_info
+from .batch_processor import BatchProcessor, ProcessingMode, ErrorHandling, create_file_processor

 # Import legacy system components for advanced management
 try:
@@ -4549,6 +4550,200 @@ def perf_history(config, limit, trend_days, output_format, output):
        sys.exit(1)


+# Batch Processing Commands - Issue #17
+
+
+@cli.command(name='ingest-dir')
+@click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path))
+@click.option('--recursive', '-r', is_flag=True, help='Process directories recursively')
+@click.option('--depth', type=int, help='Maximum depth for recursive processing')
+@click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)')
+@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']),
+              default='continue', help='Error handling strategy')
+@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output')
+@pass_config
+def ingest_dir(config, directory, recursive, depth, pattern, error_handling, quiet):
+    """Process all Markdown files in directory.
+
+    Ingests all markdown files found in the specified directory into the database.
+    Supports recursive processing with depth control and flexible error handling.
+
+    Examples:
+        markitect ingest-dir ./docs
+        markitect ingest-dir ./content --recursive --depth 3
+        markitect ingest-dir ./articles --pattern "*.markdown" --error-handling stop
+    """
+    try:
+        # Convert error handling string to enum
+        error_strategy = ErrorHandling[error_handling.upper()]
+
+        # Initialize batch processor
+        processor = BatchProcessor(
+            error_handling=error_strategy,
+            show_progress=not quiet,
+            max_depth=depth
+        )
+
+        # Find files to process
+        if not quiet:
+            click.echo(f"🔍 Searching for files in {directory}...")
+
+        files = processor.find_markdown_files(
+            directory=directory,
+            pattern=pattern,
+            recursive=recursive,
+            depth=depth
+        )
+
+        if not files:
+            click.echo(f"📭 No files found matching pattern '{pattern}' in {directory}")
+            return
+
+        # Create file processor for ingestion
+        file_processor = create_file_processor(config, ProcessingMode.INGEST)
+
+        # Process files
+        result = processor.process_files(files, file_processor, "Ingesting")
+
+        # Exit with error code if there were failures
+        if result.failed > 0 and error_strategy == ErrorHandling.STOP:
+            sys.exit(1)
+
+    except Exception as e:
+        click.echo(f"Directory ingestion failed: {e}", err=True)
+        if config.get('verbose'):
+            import traceback
+            click.echo(traceback.format_exc(), err=True)
+        sys.exit(1)
+
+
+@cli.command(name='batch-process')
+@click.argument('pattern', type=str)
+@click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']),
+              default='ingest', help='Operation to perform on matched files')
+@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']),
+              default='continue', help='Error handling strategy')
+@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output')
+@pass_config
+def batch_process(config, pattern, operation, error_handling, quiet):
+    """Process files matching glob pattern.
+
+    Uses glob patterns to find and process files. Supports various operations
+    including ingestion, status checking, and validation.
+
+    Examples:
+        markitect batch-process "**/*.md" --operation ingest
+        markitect batch-process "docs/**/*.markdown" --operation status
+        markitect batch-process "./content/*.md" --operation validate --error-handling stop
+    """
+    try:
+        # Convert strings to enums
+        error_strategy = ErrorHandling[error_handling.upper()]
+        processing_mode = ProcessingMode[operation.upper()]
+
+        # Initialize batch processor
+        processor = BatchProcessor(
+            error_handling=error_strategy,
+            show_progress=not quiet
+        )
+
+        # Find files using glob pattern
+        if not quiet:
+            click.echo(f"🔍 Searching for files matching '{pattern}'...")
+
+        files = processor.find_files_by_glob(pattern)
+
+        if not files:
+            click.echo(f"📭 No files found matching pattern '{pattern}'")
+            return
+
+        # Create file processor for the specified operation
+        file_processor = create_file_processor(config, processing_mode)
+
+        # Process files
+        operation_name = f"{operation.title()}ing"
+        result = processor.process_files(files, file_processor, operation_name)
+
+        # Exit with error code if there were failures
+        if result.failed > 0 and error_strategy == ErrorHandling.STOP:
+            sys.exit(1)
+
+    except Exception as e:
+        click.echo(f"Batch processing failed: {e}", err=True)
+        if config.get('verbose'):
+            import traceback
+            click.echo(traceback.format_exc(), err=True)
+        sys.exit(1)
+
+
+@cli.command(name='recursive')
+@click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path))
+@click.option('--depth', type=int, default=None, help='Maximum recursion depth')
+@click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']),
+              default='status', help='Operation to perform')
+@click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)')
+@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']),
+              default='continue', help='Error handling strategy')
+@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output')
+@pass_config
+def recursive(config, directory, depth, operation, pattern, error_handling, quiet):
+    """Recursive processing with depth control.
+
+    Performs recursive operations on directory trees with configurable depth limits.
+    This command provides fine-grained control over recursive processing behavior.
+
+    Examples:
+        markitect recursive ./docs --depth 2 --operation ingest
+        markitect recursive ./content --depth 5 --operation status --pattern "*.markdown"
+        markitect recursive ./src --operation validate --error-handling stop
+    """
+    try:
+        # Convert strings to enums
+        error_strategy = ErrorHandling[error_handling.upper()]
+        processing_mode = ProcessingMode[operation.upper()]
+
+        # Initialize batch processor with depth control
+        processor = BatchProcessor(
+            error_handling=error_strategy,
+            show_progress=not quiet,
+            max_depth=depth
+        )
+
+        # Find files recursively
+        if not quiet:
+            depth_str = f" (max depth: {depth})" if depth else ""
+            click.echo(f"🔍 Recursively searching {directory}{depth_str}...")
+
+        files = processor.find_markdown_files(
+            directory=directory,
+            pattern=pattern,
+            recursive=True,
+            depth=depth
+        )
+
+        if not files:
+            click.echo(f"📭 No files found matching pattern '{pattern}' in {directory}")
+            return
+
+        # Create file processor for the specified operation
+        file_processor = create_file_processor(config, processing_mode)
+
+        # Process files
+        operation_name = f"Recursively {operation}ing"
+        result = processor.process_files(files, file_processor, operation_name)
+
+        # Exit with error code if there were failures
+        if result.failed > 0 and error_strategy == ErrorHandling.STOP:
+            sys.exit(1)
+
+    except Exception as e:
+        click.echo(f"Recursive processing failed: {e}", err=True)
+        if config.get('verbose'):
+            import traceback
+            click.echo(traceback.format_exc(), err=True)
+        sys.exit(1)
+
+
 # Register issue management commands
 cli.add_command(issues_group)

--- a/tests/test_issue_17_batch_processing.py
+++ b/tests/test_issue_17_batch_processing.py
@@ -0,0 +1,653 @@
+"""
+Tests for Issue #17: Batch Processing and Recursive Operations
+
+This test suite verifies the batch processing functionality including:
+- Directory processing with recursive support
+- Glob pattern matching for file selection
+- Progress tracking and error handling
+- Depth control for recursive operations
+"""
+
+import pytest
+import tempfile
+import shutil
+from pathlib import Path
+from unittest.mock import Mock, patch, MagicMock
+from click.testing import CliRunner
+
+from markitect.batch_processor import (
+    BatchProcessor, ProcessingMode, ErrorHandling,
+    ProcessingResult, BatchResult, ProgressTracker,
+    create_file_processor
+)
+from markitect.cli import cli
+
+
+class TestBatchProcessor:
+    """Test the core BatchProcessor functionality."""
+
+    def setup_method(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+        self.test_dir = Path(self.temp_dir)
+
+    def teardown_method(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir)
+
+    def create_test_files(self, structure):
+        """Create test file structure from dict."""
+        for path, content in structure.items():
+            file_path = self.test_dir / path
+            file_path.parent.mkdir(parents=True, exist_ok=True)
+            file_path.write_text(content)
+
+    def test_find_markdown_files_non_recursive(self):
+        """Test finding markdown files without recursion."""
+        # Create test structure
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'file2.md': '# Test 2',
+            'file3.txt': 'Not markdown',
+            'subdir/file4.md': '# Test 4'
+        })
+
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir, recursive=False)
+
+        # Should find only files in root directory
+        assert len(files) == 2
+        file_names = [f.name for f in files]
+        assert 'file1.md' in file_names
+        assert 'file2.md' in file_names
+        assert 'file4.md' not in file_names
+
+    def test_find_markdown_files_recursive(self):
+        """Test finding markdown files with recursion."""
+        # Create test structure
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'subdir/file2.md': '# Test 2',
+            'subdir/nested/file3.md': '# Test 3',
+            'subdir/file4.txt': 'Not markdown'
+        })
+
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir, recursive=True)
+
+        # Should find all markdown files
+        assert len(files) == 3
+        file_names = [f.name for f in files]
+        assert 'file1.md' in file_names
+        assert 'file2.md' in file_names
+        assert 'file3.md' in file_names
+
+    def test_find_markdown_files_with_depth_limit(self):
+        """Test recursive search with depth limit."""
+        # Create test structure
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'level1/file2.md': '# Test 2',
+            'level1/level2/file3.md': '# Test 3',
+            'level1/level2/level3/file4.md': '# Test 4'
+        })
+
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir, recursive=True, depth=1)
+
+        # Should find files up to depth 1
+        assert len(files) == 2
+        file_names = [f.name for f in files]
+        assert 'file1.md' in file_names
+        assert 'file2.md' in file_names
+        assert 'file3.md' not in file_names
+        assert 'file4.md' not in file_names
+
+    def test_find_markdown_files_with_pattern(self):
+        """Test finding files with custom pattern."""
+        # Create test structure
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'file2.markdown': '# Test 2',
+            'file3.txt': 'Not markdown'
+        })
+
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir, pattern='*.markdown')
+
+        # Should find only .markdown files
+        assert len(files) == 1
+        assert files[0].name == 'file2.markdown'
+
+    def test_find_files_by_glob(self):
+        """Test glob pattern file finding."""
+        # Create test structure
+        self.create_test_files({
+            'docs/file1.md': '# Test 1',
+            'docs/subdir/file2.md': '# Test 2',
+            'src/file3.md': '# Test 3',
+            'file4.txt': 'Not markdown'
+        })
+
+        processor = BatchProcessor()
+
+        # Test recursive glob
+        files = processor.find_files_by_glob(str(self.test_dir / "**/*.md"))
+        assert len(files) == 3
+
+        # Test specific directory glob
+        files = processor.find_files_by_glob(str(self.test_dir / "docs/*.md"))
+        assert len(files) == 1
+        assert files[0].name == 'file1.md'
+
+    def test_process_files_success(self):
+        """Test successful file processing."""
+        # Create test files
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'file2.md': '# Test 2'
+        })
+
+        processor = BatchProcessor(show_progress=False)
+        files = list(self.test_dir.glob('*.md'))
+
+        def mock_processor(file_path):
+            return ProcessingResult(
+                file_path=file_path,
+                success=True,
+                message="Processed successfully"
+            )
+
+        result = processor.process_files(files, mock_processor, "Testing")
+
+        assert result.total_files == 2
+        assert result.processed == 2
+        assert result.succeeded == 2
+        assert result.failed == 0
+        assert result.skipped == 0
+
+    def test_process_files_with_errors(self):
+        """Test file processing with errors."""
+        # Create test files
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'file2.md': '# Test 2',
+            'file3.md': '# Test 3'
+        })
+
+        processor = BatchProcessor(show_progress=False, error_handling=ErrorHandling.CONTINUE)
+        files = list(self.test_dir.glob('*.md'))
+
+        def mock_processor(file_path):
+            # Fail on file2.md
+            if file_path.name == 'file2.md':
+                return ProcessingResult(
+                    file_path=file_path,
+                    success=False,
+                    message="Processing failed",
+                    error="Mock error"
+                )
+            return ProcessingResult(
+                file_path=file_path,
+                success=True,
+                message="Processed successfully"
+            )
+
+        result = processor.process_files(files, mock_processor, "Testing")
+
+        assert result.total_files == 3
+        assert result.processed == 3
+        assert result.succeeded == 2
+        assert result.failed == 1
+        assert len(result.errors) == 1
+
+    def test_process_files_stop_on_error(self):
+        """Test stop-on-error behavior."""
+        # Create test files
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'file2.md': '# Test 2',
+            'file3.md': '# Test 3'
+        })
+
+        processor = BatchProcessor(show_progress=False, error_handling=ErrorHandling.STOP)
+        files = sorted(list(self.test_dir.glob('*.md')))
+
+        def mock_processor(file_path):
+            # Fail on second file
+            if file_path.name == 'file2.md':
+                return ProcessingResult(
+                    file_path=file_path,
+                    success=False,
+                    message="Processing failed",
+                    error="Mock error"
+                )
+            return ProcessingResult(
+                file_path=file_path,
+                success=True,
+                message="Processed successfully"
+            )
+
+        result = processor.process_files(files, mock_processor, "Testing")
+
+        # Should stop after the error
+        assert result.processed == 2  # file1 success, file2 error
+        assert result.succeeded == 1
+        assert result.failed == 1
+
+
+class TestProgressTracker:
+    """Test the ProgressTracker functionality."""
+
+    def test_progress_tracking(self):
+        """Test basic progress tracking."""
+        tracker = ProgressTracker(total=3, show_progress=False)
+
+        # Test successful processing
+        result1 = ProcessingResult(Path("file1.md"), True, "Success")
+        tracker.update(result1)
+
+        assert tracker.processed == 1
+        assert tracker.succeeded == 1
+        assert tracker.failed == 0
+
+        # Test failed processing
+        result2 = ProcessingResult(Path("file2.md"), False, "Failed", "Error message")
+        tracker.update(result2)
+
+        assert tracker.processed == 2
+        assert tracker.succeeded == 1
+        assert tracker.failed == 1
+
+        # Test skipped file
+        tracker.skip_file(Path("file3.md"), "Skipped reason")
+        assert tracker.skipped == 1
+
+
+class TestFileProcessor:
+    """Test the file processor creation and execution."""
+
+    def setup_method(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+        self.test_dir = Path(self.temp_dir)
+
+    def teardown_method(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir)
+
+    @patch('markitect.database.DatabaseManager')
+    def test_ingest_processor(self, mock_db_manager):
+        """Test file processor for ingestion."""
+        # Create test file
+        test_file = self.test_dir / "test.md"
+        test_file.write_text("# Test content")
+
+        # Mock database manager
+        mock_db = Mock()
+        mock_db_manager.return_value = mock_db
+
+        config = {'database': 'test.db'}
+        processor = create_file_processor(config, ProcessingMode.INGEST)
+
+        result = processor(test_file)
+
+        assert result.success
+        assert result.file_path == test_file
+        assert "Ingested successfully" in result.message
+        mock_db.store_document.assert_called_once()
+
+    @patch('markitect.database.DatabaseManager')
+    def test_status_processor(self, mock_db_manager):
+        """Test file processor for status checking."""
+        # Create test file
+        test_file = self.test_dir / "test.md"
+        test_file.write_text("# Test content")
+
+        # Mock database manager
+        mock_db = Mock()
+        mock_db.get_metadata.return_value = {'id': 'test123'}
+        mock_db_manager.return_value = mock_db
+
+        config = {'database': 'test.db'}
+        processor = create_file_processor(config, ProcessingMode.STATUS)
+
+        result = processor(test_file)
+
+        assert result.success
+        assert result.file_path == test_file
+        assert "Found in database" in result.message
+
+    def test_validate_processor(self):
+        """Test file processor for validation."""
+        # Create test file
+        test_file = self.test_dir / "test.md"
+        test_file.write_text("# Test content")
+
+        config = {}
+        processor = create_file_processor(config, ProcessingMode.VALIDATE)
+
+        result = processor(test_file)
+
+        assert result.success
+        assert result.file_path == test_file
+        assert "Valid markdown" in result.message
+
+    def test_validate_processor_empty_file(self):
+        """Test validation processor with empty file."""
+        # Create empty file
+        test_file = self.test_dir / "empty.md"
+        test_file.write_text("")
+
+        config = {}
+        processor = create_file_processor(config, ProcessingMode.VALIDATE)
+
+        result = processor(test_file)
+
+        assert not result.success
+        assert "File is empty" in result.error
+
+
+class TestCLIIntegration:
+    """Test CLI command integration."""
+
+    def setup_method(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+        self.test_dir = Path(self.temp_dir)
+        self.runner = CliRunner()
+
+    def teardown_method(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir)
+
+    def create_test_files(self, structure):
+        """Create test file structure from dict."""
+        for path, content in structure.items():
+            file_path = self.test_dir / path
+            file_path.parent.mkdir(parents=True, exist_ok=True)
+            file_path.write_text(content)
+
+    @patch('markitect.database.DatabaseManager')
+    def test_ingest_dir_command(self, mock_db_manager):
+        """Test ingest-dir CLI command."""
+        # Create test files
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'file2.md': '# Test 2',
+            'subdir/file3.md': '# Test 3'
+        })
+
+        # Mock database
+        mock_db = Mock()
+        mock_db_manager.return_value = mock_db
+
+        result = self.runner.invoke(cli, [
+            'ingest-dir', str(self.test_dir),
+            '--quiet'
+        ])
+
+        assert result.exit_code == 0
+        # Should process 2 files (non-recursive by default)
+        assert mock_db.store_document.call_count == 2
+
+    @patch('markitect.database.DatabaseManager')
+    def test_ingest_dir_recursive(self, mock_db_manager):
+        """Test ingest-dir with recursive option."""
+        # Create test files
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'subdir/file2.md': '# Test 2',
+            'subdir/nested/file3.md': '# Test 3'
+        })
+
+        # Mock database
+        mock_db = Mock()
+        mock_db_manager.return_value = mock_db
+
+        result = self.runner.invoke(cli, [
+            'ingest-dir', str(self.test_dir),
+            '--recursive',
+            '--quiet'
+        ])
+
+        assert result.exit_code == 0
+        # Should process all 3 files
+        assert mock_db.store_document.call_count == 3
+
+    @patch('markitect.database.DatabaseManager')
+    def test_batch_process_command(self, mock_db_manager):
+        """Test batch-process CLI command."""
+        # Create test files
+        self.create_test_files({
+            'docs/file1.md': '# Test 1',
+            'docs/file2.md': '# Test 2',
+            'src/file3.md': '# Test 3'
+        })
+
+        # Mock database
+        mock_db = Mock()
+        mock_db_manager.return_value = mock_db
+
+        # Test glob pattern
+        pattern = str(self.test_dir / "docs/*.md")
+        result = self.runner.invoke(cli, [
+            'batch-process', pattern,
+            '--operation', 'ingest',
+            '--quiet'
+        ])
+
+        assert result.exit_code == 0
+        # Should process 2 files from docs directory
+        assert mock_db.store_document.call_count == 2
+
+    @patch('markitect.database.DatabaseManager')
+    def test_recursive_command(self, mock_db_manager):
+        """Test recursive CLI command."""
+        # Create test files
+        self.create_test_files({
+            'level1/file1.md': '# Test 1',
+            'level1/level2/file2.md': '# Test 2',
+            'level1/level2/level3/file3.md': '# Test 3'
+        })
+
+        # Mock database
+        mock_db = Mock()
+        mock_db.get_metadata.side_effect = Exception("Not found")
+        mock_db_manager.return_value = mock_db
+
+        result = self.runner.invoke(cli, [
+            'recursive', str(self.test_dir),
+            '--depth', '2',
+            '--operation', 'status',
+            '--quiet'
+        ])
+
+        assert result.exit_code == 0
+        # Should check status for files up to depth 2
+        assert mock_db.get_metadata.call_count == 2
+
+    def test_error_handling_stop(self):
+        """Test error handling with stop strategy."""
+        # Create test directory with no files
+        result = self.runner.invoke(cli, [
+            'ingest-dir', str(self.test_dir),
+            '--error-handling', 'stop',
+            '--quiet'
+        ])
+
+        # Should exit cleanly when no files found
+        assert result.exit_code == 0
+
+    def test_invalid_directory(self):
+        """Test handling of invalid directory."""
+        result = self.runner.invoke(cli, [
+            'ingest-dir', '/nonexistent/directory',
+            '--quiet'
+        ])
+
+        # Should exit with error
+        assert result.exit_code == 2  # Click argument validation error
+
+    @patch('markitect.database.DatabaseManager')
+    def test_custom_pattern(self, mock_db_manager):
+        """Test custom file pattern matching."""
+        # Create test files with different extensions
+        self.create_test_files({
+            'file1.md': '# Test 1',
+            'file2.markdown': '# Test 2',
+            'file3.txt': 'Not markdown'
+        })
+
+        # Mock database
+        mock_db = Mock()
+        mock_db_manager.return_value = mock_db
+
+        result = self.runner.invoke(cli, [
+            'ingest-dir', str(self.test_dir),
+            '--pattern', '*.markdown',
+            '--quiet'
+        ])
+
+        assert result.exit_code == 0
+        # Should process only .markdown files
+        assert mock_db.store_document.call_count == 1
+
+
+class TestErrorHandling:
+    """Test error handling scenarios."""
+
+    def setup_method(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+        self.test_dir = Path(self.temp_dir)
+
+    def teardown_method(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir)
+
+    def test_permission_error_handling(self):
+        """Test handling of permission errors."""
+        processor = BatchProcessor(show_progress=False)
+
+        # Mock os.listdir to raise PermissionError
+        with patch('pathlib.Path.iterdir') as mock_iterdir:
+            mock_iterdir.side_effect = PermissionError("Permission denied")
+
+            files = processor.find_markdown_files(self.test_dir)
+            # Should return empty list without crashing
+            assert files == []
+
+    def test_nonexistent_directory(self):
+        """Test handling of nonexistent directories."""
+        processor = BatchProcessor()
+
+        with pytest.raises(FileNotFoundError):
+            processor.find_markdown_files(Path("/nonexistent/directory"))
+
+    def test_file_as_directory(self):
+        """Test handling when a file is passed as directory."""
+        # Create a file
+        test_file = self.test_dir / "test.md"
+        test_file.write_text("# Test")
+
+        processor = BatchProcessor()
+
+        with pytest.raises(NotADirectoryError):
+            processor.find_markdown_files(test_file)
+
+
+class TestEdgeCases:
+    """Test edge cases and boundary conditions."""
+
+    def setup_method(self):
+        """Set up test environment."""
+        self.temp_dir = tempfile.mkdtemp()
+        self.test_dir = Path(self.temp_dir)
+
+    def teardown_method(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir)
+
+    def test_empty_directory(self):
+        """Test processing empty directory."""
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir)
+        assert files == []
+
+    def test_hidden_directories(self):
+        """Test that hidden directories are skipped."""
+        # Create hidden directory
+        hidden_dir = self.test_dir / ".hidden"
+        hidden_dir.mkdir()
+        (hidden_dir / "test.md").write_text("# Hidden")
+
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir, recursive=True)
+
+        # Should not find files in hidden directories
+        assert len(files) == 0
+
+    def test_depth_zero(self):
+        """Test depth=0 behavior."""
+        # Create nested structure
+        (self.test_dir / "file1.md").write_text("# Test 1")
+        subdir = self.test_dir / "subdir"
+        subdir.mkdir()
+        (subdir / "file2.md").write_text("# Test 2")
+
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir, recursive=True, depth=0)
+
+        # Depth 0 should only include files in the starting directory
+        # With our corrected logic, this should only find file1.md
+        assert len(files) == 1
+        assert files[0].name == "file1.md"
+
+    def test_very_deep_structure(self):
+        """Test with very deep directory structure."""
+        # Create 10-level deep structure
+        # Start with a file at the root level
+        (self.test_dir / "file_root.md").write_text("# Root Test")
+
+        current_dir = self.test_dir
+        for i in range(10):
+            current_dir = current_dir / f"level{i}"
+            current_dir.mkdir()
+            (current_dir / f"file{i}.md").write_text(f"# Test {i}")
+
+        processor = BatchProcessor()
+        files = processor.find_markdown_files(self.test_dir, recursive=True, depth=5)
+
+        # Should find files up to depth 5
+        # Root (depth 0) + levels 0-4 (depths 1-5) = 6 files
+        assert len(files) == 6
+
+    def test_glob_with_no_matches(self):
+        """Test glob pattern with no matches."""
+        processor = BatchProcessor()
+        files = processor.find_files_by_glob(str(self.test_dir / "*.nonexistent"))
+        assert files == []
+
+    def test_file_deleted_during_processing(self):
+        """Test handling file deletion during processing."""
+        # Create test file
+        test_file = self.test_dir / "test.md"
+        test_file.write_text("# Test")
+
+        def mock_processor(file_path):
+            # This test is actually checking the file existence in the process_files loop
+            # not the processor function itself
+            return ProcessingResult(file_path, True, "Processed")
+
+        processor = BatchProcessor(show_progress=False)
+        files = [test_file]
+
+        # Delete the file after creating the file list but before processing
+        test_file.unlink()
+
+        result = processor.process_files(files, mock_processor, "Testing")
+
+        # Should handle gracefully - file should be skipped
+        assert result.skipped == 1
+        assert result.processed == 0