feat: implement batch processing and recursive operations (issue #17)

Complete implementation of batch processing capabilities for MarkiTect CLI:

New CLI Commands:
- markitect ingest-dir: Process all markdown files in directory with recursive support
- markitect batch-process: Process files matching glob patterns
- markitect recursive: Recursive processing with depth control

Core Features:
- Sophisticated batch processing engine with progress tracking
- Multiple error handling strategies (stop, continue, skip)
- Recursive directory traversal with configurable depth limits
- Glob pattern matching for flexible file selection
- Progress feedback with detailed processing statistics
- Integration with existing database and caching systems

Technical Implementation:
- BatchProcessor class with modular architecture
- ProgressTracker for real-time user feedback
- Comprehensive error handling and edge case management
- Support for multiple operations (ingest, status, validate)
- Depth-controlled recursive search with proper boundary handling
- Permission error resilience and graceful degradation

Testing:
- 29 comprehensive tests covering all functionality
- Edge cases: empty directories, hidden files, permission errors
- CLI integration tests with mocked database operations
- Depth logic validation and boundary condition testing
- Error handling scenarios and recovery mechanisms

All acceptance criteria fulfilled:
 Directory and recursive processing
 Glob pattern support for file selection
 Progress tracking and user feedback
 Error handling with continuation options
 Comprehensive test coverage

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-03 10:45:43 +02:00
parent a4805812f3
commit 0982e771e4
3 changed files with 1227 additions and 0 deletions

View File

@@ -28,6 +28,7 @@ import builtins
from .database import DatabaseManager
from .legacy_compat import LegacyMode, emit_deprecation_warning, legacy_switch_option
from .__version__ import get_version_info, get_release_info
from .batch_processor import BatchProcessor, ProcessingMode, ErrorHandling, create_file_processor
# Import legacy system components for advanced management
try:
@@ -4549,6 +4550,200 @@ def perf_history(config, limit, trend_days, output_format, output):
sys.exit(1)
# Batch Processing Commands - Issue #17
@cli.command(name='ingest-dir')
@click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path))
@click.option('--recursive', '-r', is_flag=True, help='Process directories recursively')
@click.option('--depth', type=int, help='Maximum depth for recursive processing')
@click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)')
@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']),
default='continue', help='Error handling strategy')
@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output')
@pass_config
def ingest_dir(config, directory, recursive, depth, pattern, error_handling, quiet):
"""Process all Markdown files in directory.
Ingests all markdown files found in the specified directory into the database.
Supports recursive processing with depth control and flexible error handling.
Examples:
markitect ingest-dir ./docs
markitect ingest-dir ./content --recursive --depth 3
markitect ingest-dir ./articles --pattern "*.markdown" --error-handling stop
"""
try:
# Convert error handling string to enum
error_strategy = ErrorHandling[error_handling.upper()]
# Initialize batch processor
processor = BatchProcessor(
error_handling=error_strategy,
show_progress=not quiet,
max_depth=depth
)
# Find files to process
if not quiet:
click.echo(f"🔍 Searching for files in {directory}...")
files = processor.find_markdown_files(
directory=directory,
pattern=pattern,
recursive=recursive,
depth=depth
)
if not files:
click.echo(f"📭 No files found matching pattern '{pattern}' in {directory}")
return
# Create file processor for ingestion
file_processor = create_file_processor(config, ProcessingMode.INGEST)
# Process files
result = processor.process_files(files, file_processor, "Ingesting")
# Exit with error code if there were failures
if result.failed > 0 and error_strategy == ErrorHandling.STOP:
sys.exit(1)
except Exception as e:
click.echo(f"Directory ingestion failed: {e}", err=True)
if config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@cli.command(name='batch-process')
@click.argument('pattern', type=str)
@click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']),
default='ingest', help='Operation to perform on matched files')
@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']),
default='continue', help='Error handling strategy')
@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output')
@pass_config
def batch_process(config, pattern, operation, error_handling, quiet):
"""Process files matching glob pattern.
Uses glob patterns to find and process files. Supports various operations
including ingestion, status checking, and validation.
Examples:
markitect batch-process "**/*.md" --operation ingest
markitect batch-process "docs/**/*.markdown" --operation status
markitect batch-process "./content/*.md" --operation validate --error-handling stop
"""
try:
# Convert strings to enums
error_strategy = ErrorHandling[error_handling.upper()]
processing_mode = ProcessingMode[operation.upper()]
# Initialize batch processor
processor = BatchProcessor(
error_handling=error_strategy,
show_progress=not quiet
)
# Find files using glob pattern
if not quiet:
click.echo(f"🔍 Searching for files matching '{pattern}'...")
files = processor.find_files_by_glob(pattern)
if not files:
click.echo(f"📭 No files found matching pattern '{pattern}'")
return
# Create file processor for the specified operation
file_processor = create_file_processor(config, processing_mode)
# Process files
operation_name = f"{operation.title()}ing"
result = processor.process_files(files, file_processor, operation_name)
# Exit with error code if there were failures
if result.failed > 0 and error_strategy == ErrorHandling.STOP:
sys.exit(1)
except Exception as e:
click.echo(f"Batch processing failed: {e}", err=True)
if config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@cli.command(name='recursive')
@click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path))
@click.option('--depth', type=int, default=None, help='Maximum recursion depth')
@click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']),
default='status', help='Operation to perform')
@click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)')
@click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']),
default='continue', help='Error handling strategy')
@click.option('--quiet', '-q', is_flag=True, help='Suppress progress output')
@pass_config
def recursive(config, directory, depth, operation, pattern, error_handling, quiet):
"""Recursive processing with depth control.
Performs recursive operations on directory trees with configurable depth limits.
This command provides fine-grained control over recursive processing behavior.
Examples:
markitect recursive ./docs --depth 2 --operation ingest
markitect recursive ./content --depth 5 --operation status --pattern "*.markdown"
markitect recursive ./src --operation validate --error-handling stop
"""
try:
# Convert strings to enums
error_strategy = ErrorHandling[error_handling.upper()]
processing_mode = ProcessingMode[operation.upper()]
# Initialize batch processor with depth control
processor = BatchProcessor(
error_handling=error_strategy,
show_progress=not quiet,
max_depth=depth
)
# Find files recursively
if not quiet:
depth_str = f" (max depth: {depth})" if depth else ""
click.echo(f"🔍 Recursively searching {directory}{depth_str}...")
files = processor.find_markdown_files(
directory=directory,
pattern=pattern,
recursive=True,
depth=depth
)
if not files:
click.echo(f"📭 No files found matching pattern '{pattern}' in {directory}")
return
# Create file processor for the specified operation
file_processor = create_file_processor(config, processing_mode)
# Process files
operation_name = f"Recursively {operation}ing"
result = processor.process_files(files, file_processor, operation_name)
# Exit with error code if there were failures
if result.failed > 0 and error_strategy == ErrorHandling.STOP:
sys.exit(1)
except Exception as e:
click.echo(f"Recursive processing failed: {e}", err=True)
if config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
# Register issue management commands
cli.add_command(issues_group)