""" CLI Entry Point and Basic Commands - Issue #12 This module provides the command-line interface for MarkiTect, allowing users to interact with core functionality through terminal commands. Commands: - ingest: Process and store a markdown file - status: Show processing status and metadata for a file - list: List all stored files and their status Integration with existing components: - Uses DatabaseManager for file storage and retrieval - Uses DocumentManager for high-performance document processing - Maintains performance caching architecture """ import click import os import sys import json import yaml from pathlib import Path from typing import Optional, List, Tuple from dataclasses import dataclass from tabulate import tabulate import builtins from .database import DatabaseManager from .legacy_compat import LegacyMode, emit_deprecation_warning, legacy_switch_option from .__version__ import get_version_info, get_release_info from .batch_processor import BatchProcessor, ProcessingMode, ErrorHandling, create_file_processor from .config_manager import ConfigurationManager # Import cost tracking commands try: from .finance.cli import cost_commands COST_TRACKING_AVAILABLE = True except ImportError: COST_TRACKING_AVAILABLE = False # Import profile management commands try: from .profile.commands import profile_commands PROFILE_MANAGEMENT_AVAILABLE = True except ImportError: PROFILE_MANAGEMENT_AVAILABLE = False def get_database_path(config): """Get database path from config.""" return config.get('database_path', os.path.expanduser('~/.markitect/markitect.db')) # Import legacy system components for advanced management try: from .legacy import ( LegacyRegistry, LegacyAgent, LegacyStatus, AgentConfig ) LEGACY_SYSTEM_AVAILABLE = True except ImportError: LEGACY_SYSTEM_AVAILABLE = False def detect_execution_mode(): """ Detect whether we're running in interactive or automation mode. Returns: str: 'interactive' or 'automation' Detection logic: - Environment variable MARKITECT_MODE overrides detection - Interactive: TTY present, not in CI, not in pipe - Automation: CI environment, pipe/redirect, or explicit setting """ # Explicit mode override mode = os.environ.get('MARKITECT_MODE', '').lower() if mode in ['interactive', 'automation']: return mode # Detect CI environments ci_indicators = [ 'CI', 'CONTINUOUS_INTEGRATION', 'GITHUB_ACTIONS', 'GITLAB_CI', 'JENKINS_URL', 'BUILD_NUMBER' ] if any(os.environ.get(var) for var in ci_indicators): return 'automation' # Check if output is being piped or redirected if not sys.stdout.isatty(): return 'automation' # Check if input is being piped if not sys.stdin.isatty(): return 'automation' # Default to interactive for terminal usage return 'interactive' def should_use_associated_files(): """Determine if commands should use associated files behavior.""" return detect_execution_mode() == 'interactive' # DocumentManager removed - using CleanDocumentManager directly in commands from .serializer import ASTSerializer from .cache_service import CacheDirectoryService from .ast_service import ASTService from .schema_generator import SchemaGenerator from .schema_validator import SchemaValidator from .exceptions import FileNotFoundError, InvalidDepthError, SchemaValidationError, InvalidSchemaError # Global options for CLI configuration pass_config = click.make_pass_decorator(dict, ensure=True) def get_default_format(available_formats=['table', 'json', 'yaml', 'simple'], fallback='simple'): """ Get the default output format from environment variable or fallback. Supports MARKITECT_DEFAULT_FORMAT environment variable to customize the default output format across all commands. Args: available_formats: List of formats supported by the command fallback: Default format to use if env var not set or invalid Returns: Default format string """ env_format = os.environ.get('MARKITECT_DEFAULT_FORMAT', '').lower() if env_format and env_format in available_formats: return env_format # If simple is available and no env override, use simple if 'simple' in available_formats: return 'simple' # Otherwise use the provided fallback return fallback def format_output(data, output_format): """ Format data according to specified output format. Args: data: Data to format output_format: Format type ('table', 'json', 'yaml') Returns: Formatted string output """ if output_format == 'json': return json.dumps(data, indent=2, default=str) elif output_format == 'yaml': return yaml.dump(data, default_flow_style=False, allow_unicode=True) elif output_format == 'simple': # Simple format - just basic text output if isinstance(data, builtins.list): return '\n'.join(str(item) for item in data) elif isinstance(data, builtins.dict): return '\n'.join(f"{key}: {value}" for key, value in data.items()) else: return str(data) elif output_format == 'table': try: # Check if it's a list type if isinstance(data, (builtins.list, builtins.tuple)): if data and isinstance(data[0], builtins.dict): # List of dictionaries - format as table headers = sorted(data[0].keys()) rows = [] for item in data: row = [] for header in headers: row.append(item.get(header, '')) rows.append(row) return tabulate(rows, headers=headers, tablefmt='grid') else: # List of simple values return tabulate([[item] for item in data], headers=['Value'], tablefmt='grid') elif isinstance(data, builtins.dict): # Single dictionary - format as key-value table rows = [[key, value] for key, value in data.items()] return tabulate(rows, headers=['Key', 'Value'], tablefmt='grid') else: # Fallback to string representation return str(data) except Exception as e: # Fallback to string if table formatting fails return f"Table formatting error: {e}\nData: {str(data)}" else: # Default to table format return format_output(data, 'table') def print_version(ctx, param, value): """Callback to print version and exit.""" if not value or ctx.resilient_parsing: return version_info = get_version_info() click.echo(version_info['full_version']) ctx.exit() @click.group() @click.option('--verbose', '-v', is_flag=True, help='Enable verbose output') @click.option('--config', 'config_file', type=click.Path(exists=True), help='Configuration file path') @click.option('--database', type=click.Path(), help='Database file path') @click.option('--version', is_flag=True, expose_value=False, is_eager=True, callback=print_version, help='Show version and exit') @pass_config def cli(config, verbose, database, config_file): """ MarkiTect - Advanced Markdown engine for structured content. Process markdown files with front matter support, AST caching, and relational metadata queries. Examples: markitect ingest document.md # Process a markdown file markitect status document.md # Check file status markitect list # List all stored files """ # Store configuration in context config['verbose'] = verbose config['config_file'] = config_file # Determine database path if database: config['database_path'] = database else: # Default database location config['database_path'] = os.path.expanduser('~/.markitect/markitect.db') # Initialize database manager and ensure database exists try: db_manager = DatabaseManager(config['database_path']) db_manager.initialize_database() config['db_manager'] = db_manager if verbose: click.echo(f"Using database: {config['database_path']}", err=True) except Exception as e: click.echo(f"Error initializing database: {e}", err=True) sys.exit(1) # Issue management commands removed - use dedicated 'issue' CLI or 'tddai' CLI instead # Version and release information commands @cli.command() @click.option('--short', is_flag=True, help='Show only version number') def version(short): """Show MarkiTect version information.""" version_info = get_version_info() if short: click.echo(version_info['full_version']) else: click.echo("MarkiTect Version Information") click.echo("============================") click.echo(f"Version: {version_info['full_version']}") click.echo(f"Short Version: {version_info['short_version']}") if version_info.get('is_git_repo'): click.echo(f"Git Commit: {version_info.get('git_commit', 'N/A')}") click.echo(f"Git Branch: {version_info.get('git_branch', 'N/A')}") if version_info.get('git_tag'): click.echo(f"Git Tag: {version_info['git_tag']}") click.echo(f"Development Build: {'Yes' if version_info.get('is_dev') else 'No'}") else: click.echo("Git Repository: Not available") @cli.command() @click.option('--format', 'output_format', default='text', type=click.Choice(['text', 'json', 'yaml']), help='Output format (text, json, yaml)') def release(output_format): """Show MarkiTect release information.""" release_info = get_release_info() if output_format == 'json': import json click.echo(json.dumps(release_info, indent=2)) elif output_format == 'yaml': import yaml click.echo(yaml.dump(release_info, default_flow_style=False)) else: # Text format click.echo("MarkiTect Release Information") click.echo("============================") click.echo(f"Version: {release_info['full_version']}") click.echo(f"Release Type: {release_info['release_type']}") click.echo(f"Build From: {release_info['build_from']}") click.echo(f"Commit: {release_info['commit']}") click.echo(f"Clean Build: {'Yes' if release_info['clean_build'] else 'No'}") if release_info['is_git_repo']: click.echo(f"Git Repository: Available") if release_info['git_tag']: click.echo(f"Tagged Release: {release_info['git_tag']}") else: click.echo("Git Repository: Not available") def _show_core_system_stats(config, format): """Display core MarkiTect system statistics and health information.""" try: # Collect core system statistics stats = {} # Database Statistics try: db_manager = config.get('db_manager') if db_manager: # Get database file info db_path = config.get('database_path', 'Unknown') db_exists = Path(db_path).exists() if db_path != 'Unknown' else False if db_exists: db_size = Path(db_path).stat().st_size db_size_human = format_file_size(db_size) else: db_size = 0 db_size_human = '0 B' # Get file counts from database using proper API try: # Use execute_query method for database queries total_files_result = db_manager.execute_query("SELECT COUNT(*) as count FROM markdown_files") total_files = total_files_result[0]['count'] if total_files_result else 0 # Recent files (last 7 days) recent_files_result = db_manager.execute_query(""" SELECT COUNT(*) as count FROM markdown_files WHERE created_at >= datetime('now', '-7 days') """) recent_files = recent_files_result[0]['count'] if recent_files_result else 0 # Schema count (table might not exist) try: schema_count_result = db_manager.execute_query("SELECT COUNT(*) as count FROM schema_files") schema_count = schema_count_result[0]['count'] if schema_count_result else 0 except Exception: # schema_files table doesn't exist - this is okay schema_count = 0 stats['database'] = { 'path': db_path, 'exists': db_exists, 'size_bytes': db_size, 'size_human': db_size_human, 'total_markdown_files': total_files, 'recent_files_7_days': recent_files, 'schema_files': schema_count, 'available': True } except Exception as db_error: stats['database'] = { 'path': db_path, 'exists': db_exists, 'size_bytes': db_size, 'size_human': db_size_human, 'available': False, 'error': str(db_error) } else: stats['database'] = { 'available': False, 'message': 'Database manager not initialized' } except Exception as e: stats['database'] = { 'available': False, 'error': str(e) } # Cache Statistics try: from .cache_service import CacheDirectoryService cache_service = CacheDirectoryService() cache_stats = cache_service.get_cache_stats() stats['cache'] = { 'directory': cache_stats.get('cache_directory', 'Unknown'), 'total_files': cache_stats.get('total_files', 0), 'size_bytes': cache_stats.get('cache_size_bytes', 0), 'size_human': cache_stats.get('cache_size_human', '0 B'), 'available': True } except Exception as e: stats['cache'] = { 'available': False, 'error': str(e) } # System Information stats['system'] = { 'working_directory': os.getcwd(), 'python_version': sys.version.split()[0], 'config_file': config.get('config_file', 'None specified'), 'verbose_mode': config.get('verbose', False), 'execution_mode': detect_execution_mode() } # Subsystem Health Check subsystems = { 'database': stats['database']['available'], 'cache': stats['cache']['available'], 'ast_service': True, # Available if we got here } healthy_count = sum(subsystems.values()) total_count = len(subsystems) stats['health'] = { 'subsystems': subsystems, 'healthy_subsystems': healthy_count, 'total_subsystems': total_count, 'health_percentage': round((healthy_count / total_count) * 100, 1), 'overall_status': 'healthy' if healthy_count == total_count else 'degraded' } # Format output if format == 'json': click.echo(json.dumps(stats, indent=2)) elif format == 'yaml': click.echo(yaml.dump(stats, default_flow_style=False)) elif format == 'simple': # Simple key-value output if stats['database']['available']: db = stats['database'] click.echo(f"database_files: {db['total_markdown_files']}") click.echo(f"database_size: {db['size_human']}") click.echo(f"database_recent_files: {db['recent_files_7_days']}") else: click.echo("database_available: False") if stats['cache']['available']: cache = stats['cache'] click.echo(f"cache_files: {cache['total_files']}") click.echo(f"cache_size: {cache['size_human']}") else: click.echo("cache_available: False") health = stats['health'] click.echo(f"system_health: {health['health_percentage']}%") click.echo(f"overall_status: {health['overall_status']}") else: # table format (default) click.echo("šŸ“Š MarkiTect Core System Statistics") click.echo("=" * 50) # System Health Overview health = stats['health'] health_icon = "āœ…" if health['overall_status'] == 'healthy' else "āš ļø" click.echo(f"\nšŸ„ System Health: {health_icon} {health['overall_status'].title()} ({health['health_percentage']}%)") click.echo(f" Healthy Subsystems: {health['healthy_subsystems']}/{health['total_subsystems']}") # Database section click.echo("\nšŸ—„ļø Database:") if stats['database']['available']: db = stats['database'] click.echo(f" Path: {db['path']}") click.echo(f" Status: āœ… Available ({db['size_human']})") click.echo(f" Markdown Files: {db['total_markdown_files']}") click.echo(f" Schema Files: {db['schema_files']}") click.echo(f" Recent Activity (7 days): {db['recent_files_7_days']} files") else: click.echo(" Status: āŒ Unavailable") if 'error' in stats['database']: click.echo(f" Error: {stats['database']['error']}") elif 'message' in stats['database']: click.echo(f" Note: {stats['database']['message']}") # Cache section click.echo("\nšŸ—ƒļø Cache:") if stats['cache']['available']: cache = stats['cache'] click.echo(f" Directory: {cache['directory']}") click.echo(f" Status: āœ… Available ({cache['size_human']})") click.echo(f" Cached Files: {cache['total_files']}") else: click.echo(" Status: āŒ Unavailable") if 'error' in stats['cache']: click.echo(f" Error: {stats['cache']['error']}") # System section click.echo("\nšŸ–„ļø System Information:") sys_info = stats['system'] click.echo(f" Working Directory: {sys_info['working_directory']}") click.echo(f" Python Version: {sys_info['python_version']}") click.echo(f" Execution Mode: {sys_info['execution_mode']}") click.echo(f" Config File: {sys_info['config_file']}") click.echo(f" Verbose Mode: {sys_info['verbose_mode']}") except Exception as e: click.echo(f"Error gathering core system statistics: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) @cli.command('stats') @click.argument('file_path', type=str, required=False) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def stats(config, file_path, format): """ Show core system statistics or file-specific information. When called with a file: Display file's processing status, metadata, and front matter content from the database. When called without a file: Show core MarkiTect system statistics including database status, processing metrics, and subsystem health. FILE_PATH: Optional path or name of the file to check Examples: markitect stats # Show core system statistics markitect stats README.md # Show file-specific status markitect stats docs/guide.md --format json """ try: # If no file provided, show core system statistics if not file_path: if config.get('verbose'): click.echo("Displaying core system statistics", err=True) _show_core_system_stats(config, format) return # File-specific status (existing behavior) if config['verbose']: click.echo(f"Checking status for: {file_path}") # Get file information from database db_manager = config['db_manager'] file_info = db_manager.get_markdown_file(file_path) if file_info: click.echo(f"File: {file_info['filename']}") click.echo(f"Status: Processed") click.echo(f"Created: {file_info['created_at']}") if file_info['front_matter']: try: front_matter = eval(file_info['front_matter']) # Safe for our controlled data if front_matter: click.echo("Front Matter:") for key, value in front_matter.items(): click.echo(f" {key}: {value}") except (ValueError, TypeError, SyntaxError): click.echo("Front Matter: (parsing error)") elif file_info['front_matter'] is None: pass # No front matter to display if config['verbose']: content_preview = file_info['content'][:200] + "..." if len(file_info['content']) > 200 else file_info['content'] click.echo(f"Content preview: {content_preview}") else: click.echo(f"File not found in database: {file_path}") click.echo("Use 'markitect ingest' to process the file first.") sys.exit(1) except Exception as e: click.echo(f"Error checking file status: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.argument('file_path', type=str) @click.option('--add-section', type=str, help='Add section with title') @click.option('--section-content', type=str, default='', help='Content for new section') @click.option('--section-level', type=int, default=2, help='Heading level for new section (1-6)') @click.option('--update-front-matter', type=str, help='Update front matter (format: key:value)') @click.option('--output', '-o', type=click.Path(), help='Output file path (default: overwrite original in cache)') @pass_config def modify(config, file_path, add_section, section_content, section_level, update_front_matter, output): """ Modify the content of a processed markdown file. Loads the file from cache, applies modifications, and updates the cache or outputs to a new file. Supports adding sections and updating front matter. FILE_PATH: Name of the file to modify Examples: markitect modify README.md --add-section "New Section" --section-content "New content" markitect modify doc.md --update-front-matter "status:updated" markitect modify doc.md --add-section "Notes" --output modified_doc.md """ try: if config['verbose']: click.echo(f"Modifying file: {file_path}") db_manager = config['db_manager'] # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect ingest' to process the file first.", err=True) sys.exit(1) # Load AST from cache cache_filename = f"{file_path}.ast.json" cache_path = Path('.ast_cache') / cache_filename if not cache_path.exists(): click.echo(f"AST cache not found: {cache_path}", err=True) click.echo("Try re-ingesting the file to regenerate cache.", err=True) sys.exit(1) # Read AST from cache with open(cache_path, 'r', encoding='utf-8') as f: ast = json.load(f) # Parse front matter from database front_matter = {} if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) or {} except (ValueError, TypeError, SyntaxError): if config['verbose']: click.echo("Warning: Could not parse existing front matter", err=True) # Prepare modifications modifications = {} changes_made = [] # Handle add-section modification if add_section: modifications['add_section'] = { 'title': add_section, 'content': section_content, 'level': section_level } changes_made.append(f"Added section: {add_section}") # Handle front matter updates if update_front_matter: try: if ':' in update_front_matter: key, value = update_front_matter.split(':', 1) key = key.strip() value = value.strip() # Try to parse value as appropriate type if value.lower() in ['true', 'false']: value = value.lower() == 'true' elif value.isdigit(): value = int(value) elif value.replace('.', '').isdigit(): value = float(value) front_matter[key] = value changes_made.append(f"Updated front matter: {key} = {value}") else: click.echo("Invalid front matter format. Use 'key:value'", err=True) sys.exit(1) except ValueError as e: click.echo(f"Error parsing front matter update: {e}", err=True) sys.exit(1) if not changes_made: click.echo("No modifications specified. Use --add-section or --update-front-matter", err=True) sys.exit(1) # Apply modifications to AST serializer = ASTSerializer() if modifications: ast = serializer.modify_ast_content(ast, modifications) # Serialize back to markdown markdown_content = serializer.serialize_to_markdown(ast, front_matter) # Handle output if output: # Write to specified output file output_path = Path(output) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(markdown_content) click.echo(f"āœ“ Modified file written to: {output_path}") else: # Update the cache and database with modifications with open(cache_path, 'w', encoding='utf-8') as f: json.dump(ast, f, indent=2, ensure_ascii=False) # Update database with new front matter if front_matter: # Note: This would require extending DatabaseManager to update front matter # For now, we'll just note the modification if config['verbose']: click.echo("Note: Database front matter update not implemented yet", err=True) click.echo(f"āœ“ Modified file updated in cache: {file_path}") # Show changes made if config['verbose']: click.echo("Changes applied:", err=True) for change in changes_made: click.echo(f" - {change}", err=True) except Exception as e: click.echo(f"Error modifying file: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('db-query') @click.argument('sql', type=str) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def db_query(config, sql, format): """ Execute SQL query against the database. Execute read-only SQL queries to explore and analyze document metadata. Only SELECT and WITH statements are allowed for security. SQL: SQL query to execute (SELECT statements only) Examples: markitect db-query "SELECT filename, created_at FROM markdown_files" markitect db-query "SELECT COUNT(*) as total FROM markdown_files" --format json markitect db-query "SELECT * FROM markdown_files WHERE filename LIKE '%.md'" --format yaml """ try: if config['verbose']: click.echo(f"Executing query: {sql}", err=True) db_manager = config['db_manager'] # Execute the query results = db_manager.execute_query(sql) if not results: if format == 'json': click.echo('[]') elif format == 'yaml': click.echo('[]') else: click.echo("No results found.") return # Format and display results formatted_output = format_output(results, format) click.echo(formatted_output) if config['verbose']: click.echo(f"Query returned {len(results)} result(s)", err=True) except ValueError as e: click.echo(f"Query error: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Database error: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('db-schema') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def db_schema(config, format): """ Show database schema and table structure. Display the structure of all tables in the database, including column names, types, and constraints. Examples: markitect db-schema markitect db-schema --format json markitect db-schema --format yaml """ try: if config['verbose']: click.echo("Retrieving database schema...", err=True) db_manager = config['db_manager'] # Get schema information schema_info = db_manager.get_schema() if not schema_info: click.echo("No tables found in database.") return # Format and display schema formatted_output = format_output(schema_info, format) click.echo(formatted_output) if config['verbose']: table_count = len(schema_info) click.echo(f"Schema contains {table_count} table(s)", err=True) except Exception as e: click.echo(f"Schema error: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.argument('file_path', type=str) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def metadata(config, file_path, format): """ Display file metadata and front matter. DEPRECATED: Use 'db-data' instead. This command will be removed in a future version. Show detailed information about a specific file including its front matter, database metadata, and processing information. FILE_PATH: Name of the file to display metadata for Examples: markitect metadata README.md (deprecated - use: markitect db-data README.md) markitect metadata docs/guide.md --format json (deprecated - use: markitect db-data docs/guide.md --format json) markitect metadata config.md --format yaml (deprecated - use: markitect db-data config.md --format yaml) """ # Show deprecation warning (unless in legacy mode) if not LegacyMode.should_suppress_warnings(): emit_deprecation_warning( "The 'metadata' command is deprecated. Please use 'db-data' instead. " "This command will be removed in a future version." ) try: if config['verbose']: click.echo(f"Retrieving metadata for: {file_path}", err=True) db_manager = config['db_manager'] # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect ingest' to process the file first.", err=True) sys.exit(1) # Parse front matter for better display if file_info.get('front_matter'): try: if isinstance(file_info['front_matter'], str): file_info['front_matter'] = eval(file_info['front_matter']) except (ValueError, TypeError, SyntaxError): if config['verbose']: click.echo("Warning: Could not parse front matter", err=True) # Format and display metadata formatted_output = format_output(file_info, format) click.echo(formatted_output) if config['verbose']: content_length = len(file_info.get('content', '')) click.echo(f"Content length: {content_length} characters", err=True) except Exception as e: click.echo(f"Metadata error: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('cache-stats') @pass_config def cache_stats(config): """ Display cache statistics and effectiveness. Shows information about AST cache including directory path, total files cached, cache size, and performance metrics. Renamed from cache-info for consistency with subsystem naming convention. """ try: cache_service = CacheDirectoryService() stats = cache_service.get_cache_stats() click.echo(f"Cache Directory: {stats['directory']}") click.echo(f"Total Files: {stats['total_files']}") click.echo(f"Cache Size: {stats['size_formatted']}") except Exception as e: click.echo(f"Cache info error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('cache-clean') @pass_config def cache_clean(config): """ Clear cache and free memory. Removes all cached AST files from the cache directory to free up disk space and memory. """ try: cache_service = CacheDirectoryService() result = cache_service.clean_cache() click.echo(result['message']) if not result['success'] and result.get('errors'): for error in result['errors']: click.echo(f"Warning: {error}", err=True) if not result['success']: sys.exit(1) except Exception as e: click.echo(f"Cache clean error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('cache-invalidate') @click.argument('file_path', type=str) @pass_config def cache_invalidate(config, file_path): """ Invalidate specific file cache. Removes the cached AST for a specific markdown file, forcing it to be re-parsed on next access. Args: file_path: Path to the file whose cache should be invalidated """ try: cache_service = CacheDirectoryService() result = cache_service.invalidate_file_cache(file_path) click.echo(result['message']) if not result['success']: sys.exit(1) except Exception as e: click.echo(f"Cache invalidate error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('ast-show') @click.argument('file_path', type=click.Path(exists=False)) @click.option('--format', '-f', type=click.Choice(['tree', 'json', 'compact']), default='tree', help='Display format') @pass_config def ast_show(config, file_path, format): """ Display AST structure for file. Shows the Abstract Syntax Tree representation of a markdown file with various formatting options for analysis and debugging. FILE_PATH: Path to the markdown file to analyze Examples: markitect ast-show document.md markitect ast-show document.md --format json markitect ast-show document.md --format compact """ try: if config.get('verbose'): click.echo(f"Analyzing AST structure for: {file_path}", err=True) ast_service = ASTService() result = ast_service.display_ast(Path(file_path), format) if result['success']: if result.get('message'): if config.get('verbose'): click.echo(f"Info: {result['message']}", err=True) click.echo(result['output']) if config.get('verbose') and result.get('token_count'): click.echo(f"Total tokens: {result['token_count']}", err=True) else: click.echo(f"Error: {result['message']}", err=True) sys.exit(1) except Exception as e: click.echo(f"AST display error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('ast-query') @click.argument('file_path', type=click.Path(exists=False)) @click.argument('jsonpath', type=str) @click.option('--format', '-f', type=click.Choice(['json', 'compact']), default='json', help='Output format') @pass_config def ast_query(config, file_path, jsonpath, format): """ Query AST using JSONPath. Execute JSONPath expressions against the AST structure of a markdown file to extract specific elements or patterns. FILE_PATH: Path to the markdown file to query JSONPATH: JSONPath expression to execute Examples: markitect ast-query doc.md '$.*.type' markitect ast-query doc.md '$..tag' markitect ast-query doc.md '$[:5]' --format compact """ try: if config.get('verbose'): click.echo(f"Executing JSONPath query on: {file_path}", err=True) click.echo(f"Query: {jsonpath}", err=True) ast_service = ASTService() result = ast_service.query_ast(Path(file_path), jsonpath) if result['success']: if config.get('verbose'): click.echo(f"Query results: {result['count']} matches", err=True) if result['count'] == 0: click.echo("No matches found for query.") else: if format == 'compact': for i, match in enumerate(result['matches']): if isinstance(match, dict): token_type = match.get('type', 'unknown') content = match.get('content', match.get('tag', ''))[:30] click.echo(f"[{i}] {token_type}: {content}") else: click.echo(f"[{i}] {match}") else: import json click.echo(json.dumps(result['matches'], indent=2, ensure_ascii=False)) else: click.echo(f"Error: {result['message']}", err=True) sys.exit(1) except Exception as e: click.echo(f"AST query error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) def _show_ast_subsystem_stats(config, format): """Display AST subsystem statistics including cache and processing metrics.""" try: # Import dependencies from .ast_cache import ASTCache from .cache_service import CacheDirectoryService # Collect AST subsystem statistics stats = {} # AST Cache information try: cache_service = CacheDirectoryService() cache_stats = cache_service.get_cache_stats() stats['ast_cache'] = { 'directory': cache_stats.get('cache_directory', 'Unknown'), 'total_files': cache_stats.get('total_files', 0), 'cache_size_bytes': cache_stats.get('cache_size_bytes', 0), 'cache_size_human': cache_stats.get('cache_size_human', '0 B'), 'available': True } except Exception as e: stats['ast_cache'] = { 'available': False, 'error': str(e) } # Database statistics (files processed) try: db_manager = config.get('db_manager') if db_manager: # Get count of files in database (processed files) conn = db_manager.get_connection() cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM markdown_files") total_files = cursor.fetchone()[0] # Get recent processing info cursor.execute(""" SELECT COUNT(*) FROM markdown_files WHERE created_at >= datetime('now', '-7 days') """) recent_files = cursor.fetchone()[0] stats['processing'] = { 'total_files_processed': total_files, 'files_processed_last_7_days': recent_files, 'database_available': True } else: stats['processing'] = { 'database_available': False, 'message': 'Database not initialized' } except Exception as e: stats['processing'] = { 'database_available': False, 'error': str(e) } # System information stats['system'] = { 'ast_service_available': True, # If we got here, it's available 'working_directory': os.getcwd(), 'python_version': sys.version.split()[0] } # Format output if format == 'json': click.echo(json.dumps(stats, indent=2)) elif format == 'yaml': click.echo(yaml.dump(stats, default_flow_style=False)) elif format == 'simple': # Simple key-value output if stats['ast_cache']['available']: cache = stats['ast_cache'] click.echo(f"ast_cache_directory: {cache['directory']}") click.echo(f"ast_cache_files: {cache['total_files']}") click.echo(f"ast_cache_size: {cache['cache_size_human']}") else: click.echo(f"ast_cache_available: False") if stats['processing']['database_available']: proc = stats['processing'] click.echo(f"total_files_processed: {proc['total_files_processed']}") click.echo(f"recent_files_processed: {proc['files_processed_last_7_days']}") else: click.echo("database_available: False") sys_info = stats['system'] click.echo(f"working_directory: {sys_info['working_directory']}") click.echo(f"python_version: {sys_info['python_version']}") else: # table format (default) click.echo("šŸ“Š AST Subsystem Statistics") click.echo("=" * 50) # AST Cache section click.echo("\nšŸ—ƒļø AST Cache:") if stats['ast_cache']['available']: cache = stats['ast_cache'] click.echo(f" Directory: {cache['directory']}") click.echo(f" Cached Files: {cache['total_files']}") click.echo(f" Cache Size: {cache['cache_size_human']}") if cache['total_files'] == 0: click.echo(" Status: āš ļø No files cached yet") else: click.echo(f" Status: āœ… Active") else: click.echo(" Status: āŒ Unavailable") if 'error' in stats['ast_cache']: click.echo(f" Error: {stats['ast_cache']['error']}") # Processing section click.echo("\nāš™ļø Processing Metrics:") if stats['processing']['database_available']: proc = stats['processing'] click.echo(f" Total Files Processed: {proc['total_files_processed']}") click.echo(f" Files Processed (Last 7 Days): {proc['files_processed_last_7_days']}") if proc['total_files_processed'] == 0: click.echo(" Status: āš ļø No files processed yet") else: click.echo(" Status: āœ… Active") else: click.echo(" Status: āŒ Database unavailable") if 'error' in stats['processing']: click.echo(f" Error: {stats['processing']['error']}") elif 'message' in stats['processing']: click.echo(f" Note: {stats['processing']['message']}") # System section click.echo("\nšŸ–„ļø System Information:") sys_info = stats['system'] click.echo(f" AST Service: āœ… Available") click.echo(f" Working Directory: {sys_info['working_directory']}") click.echo(f" Python Version: {sys_info['python_version']}") except Exception as e: click.echo(f"Error gathering AST subsystem statistics: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) @cli.command('ast-stats') @click.argument('file_path', type=click.Path(exists=False), required=False) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default='table', help='Output format') @pass_config def ast_stats(config, file_path, format): """ Show AST statistics for files or AST subsystem information. When called with a file: Analyze markdown file structure and provide comprehensive statistics about document elements, organization, and content patterns. When called without a file: Show AST subsystem statistics including cache information, processing metrics, and system status. FILE_PATH: Optional path to the markdown file to analyze Examples: markitect ast-stats # Show AST subsystem statistics markitect ast-stats document.md # Analyze specific file markitect ast-stats document.md --format json """ try: # If no file provided, show AST subsystem statistics if not file_path: if config.get('verbose'): click.echo("Displaying AST subsystem statistics", err=True) _show_ast_subsystem_stats(config, format) return # File-specific analysis (existing behavior) if config.get('verbose'): click.echo(f"Calculating statistics for: {file_path}", err=True) ast_service = ASTService() result = ast_service.analyze_ast_statistics(Path(file_path)) if result['success']: if config.get('verbose'): click.echo(f"Analysis complete for: {Path(file_path).name}", err=True) stats = result['statistics'] if format == 'table': # Format statistics as readable table click.echo("Document Statistics:") click.echo("=" * 40) click.echo(f"Total AST tokens: {stats.get('total_tokens', 0)}") click.echo(f"Document structure: {stats.get('document_structure', 'unknown')}") click.echo() # Headings headings = stats.get('headings', {}) click.echo(f"Headings: {headings.get('total', 0)}") for level, count in headings.get('by_level', {}).items(): click.echo(f" {level.upper()}: {count}") click.echo(f"Paragraphs: {stats.get('paragraphs', 0)}") click.echo(f"Links: {stats.get('links', 0)}") # Lists lists = stats.get('lists', {}) total_lists = lists.get('ordered', 0) + lists.get('unordered', 0) click.echo(f"Lists: {total_lists}") if total_lists > 0: click.echo(f" Ordered: {lists.get('ordered', 0)}") click.echo(f" Unordered: {lists.get('unordered', 0)}") click.echo(f"Code blocks: {stats.get('code_blocks', 0)}") click.echo(f"Inline code: {stats.get('inline_code', 0)}") click.echo(f"Blockquotes: {stats.get('blockquotes', 0)}") # Emphasis emphasis = stats.get('emphasis', {}) click.echo(f"Strong text: {emphasis.get('strong', 0)}") click.echo(f"Italic text: {emphasis.get('italic', 0)}") elif format == 'json': import json click.echo(json.dumps(stats, indent=2, ensure_ascii=False)) elif format == 'yaml': import yaml click.echo(yaml.dump(stats, default_flow_style=False, allow_unicode=True)) elif format == 'simple': # Simple format - same as table but more concise click.echo("Document Statistics:") click.echo("=" * 40) click.echo(f"Total AST tokens: {stats.get('total_tokens', 0)}") click.echo(f"Document structure: {stats.get('document_structure', 'unknown')}") click.echo() # Headings headings = stats.get('headings', {}) click.echo(f"Headings: {headings.get('total', 0)}") for level, count in headings.get('by_level', {}).items(): click.echo(f" {level.upper()}: {count}") click.echo(f"Paragraphs: {stats.get('paragraphs', 0)}") click.echo(f"Links: {stats.get('links', 0)}") # Lists lists = stats.get('lists', {}) total_lists = lists.get('ordered', 0) + lists.get('unordered', 0) click.echo(f"Lists: {total_lists}") if total_lists > 0: click.echo(f" Ordered: {lists.get('ordered', 0)}") click.echo(f" Unordered: {lists.get('unordered', 0)}") click.echo(f"Code blocks: {stats.get('code_blocks', 0)}") click.echo(f"Inline code: {stats.get('inline_code', 0)}") click.echo(f"Blockquotes: {stats.get('blockquotes', 0)}") # Emphasis emphasis = stats.get('emphasis', {}) click.echo(f"Strong text: {emphasis.get('strong', 0)}") click.echo(f"Italic text: {emphasis.get('italic', 0)}") else: click.echo(f"Error: {result['message']}", err=True) sys.exit(1) except Exception as e: click.echo(f"AST statistics error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('schema-generate') @click.argument('file_path', type=click.Path(exists=True, path_type=Path)) @click.option('--max-depth', '-d', type=int, help='Maximum heading depth to include in schema') @click.option('--output', '-o', type=click.Path(path_type=Path), help='Output file path (default: stdout)') @click.option('--outfile', type=click.Path(path_type=Path), help='Output file path (alias for --output)') @click.option('--format', 'output_format', type=click.Choice(['json', 'yaml']), default='json', help='Output format') @click.option('--mode', type=click.Choice(['outline']), help='Generation mode: outline for structure-focused schemas') @click.option('--depth', type=int, help='Maximum depth for outline mode (similar to --max-depth)') @click.option('--capture-heading-text', is_flag=True, help='Capture exact heading text as schema constraints') @click.option('--include-content-instructions', is_flag=True, help='Include content field instructions for document generation') @click.option('--instruction-type', type=click.Choice(['description', 'example', 'constraint', 'template']), default='description', help='Type of content instructions to generate') @pass_config def generate_schema(config, file_path, max_depth, output, outfile, output_format, mode, depth, capture_heading_text, include_content_instructions, instruction_type): """ Generate a JSON schema from a markdown file's AST structure. FILE_PATH: Path to the markdown file to analyze Examples: markitect schema-generate document.md markitect schema-generate document.md --max-depth 2 markitect schema-generate document.md --output schema.json # Outline mode for structure-focused schemas markitect schema-generate --mode outline document.md markitect schema-generate --mode outline --depth 3 --outfile schema.json document.md # Heading text capture for validation constraints markitect schema-generate --capture-heading-text document.md markitect schema-generate --mode outline --capture-heading-text --depth 2 document.md # Content instructions for document generation guidance markitect schema-generate --include-content-instructions document.md markitect schema-generate --include-content-instructions --instruction-type example document.md markitect schema-generate --mode outline --include-content-instructions --instruction-type template document.md Modes: Default: Standard schema generation with structural analysis Outline: Structure-focused schema with heading text capture and metaschema extensions Heading Text Capture: When --capture-heading-text is enabled, the schema will include exact heading text as enum constraints, enabling validation to enforce specific heading text requirements. Content Instructions: When --include-content-instructions is enabled, the schema will include guidance fields for document generation. Use --instruction-type to specify the type of instructions: - description: Descriptive guidance for content authors - example: Example-based content guidance - constraint: Content constraint specifications - template: Template-based content structure """ try: # Handle parameter conflicts and defaults if outfile and output: click.echo("Error: Cannot specify both --output and --outfile", err=True) sys.exit(1) # Use outfile as output if specified final_output = outfile or output # Handle depth parameter for outline mode if mode == 'outline': if depth is not None and max_depth is not None: click.echo("Error: Cannot specify both --depth and --max-depth with outline mode", err=True) sys.exit(1) final_depth = depth if depth is not None else max_depth else: final_depth = max_depth # Validate depth parameter if final_depth is not None and final_depth < 1: click.echo("Invalid depth parameter: depth must be >= 1", err=True) sys.exit(1) # Initialize schema generator and associated files manager generator = SchemaGenerator() from .associated_files import AssociatedFilesManager associated_files = AssociatedFilesManager() # Generate schema with mode support schema = generator.generate_schema_from_file( file_path, max_depth=final_depth, mode=mode, outline_depth=depth if mode == 'outline' else None, capture_heading_text=capture_heading_text, include_content_instructions=include_content_instructions, instruction_type=instruction_type ) # Format output if output_format == 'json': formatted_output = json.dumps(schema, indent=2, ensure_ascii=False) elif output_format == 'yaml': formatted_output = yaml.dump(schema, default_flow_style=False, allow_unicode=True) else: formatted_output = json.dumps(schema, indent=2, ensure_ascii=False) # Mode-based output logic if not final_output and should_use_associated_files(): # Interactive mode: use associated file path from .associated_files import AssociatedFilesManager associated_files = AssociatedFilesManager() final_output = associated_files.get_associated_schema_path(file_path) if config.get('verbose'): click.echo(f"Interactive mode: using associated file path: {final_output}", err=True) # Write to output if final_output: final_output.write_text(formatted_output, encoding='utf-8') click.echo(f"Schema written to: {final_output}") # Show summary properties = schema.get('properties', {}) click.echo(f"Generated schema with {len(properties)} property types") if 'headings' in properties: heading_levels = len(properties['headings'].get('properties', {})) click.echo(f" - {heading_levels} heading levels found") structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables'] found_elements = [elem for elem in structural_elements if elem in properties] if found_elements: click.echo(f" - Structural elements: {', '.join(found_elements)}") else: click.echo(formatted_output) except FileNotFoundError as e: click.echo(f"File not found: {e}", err=True) sys.exit(1) except InvalidDepthError as e: click.echo(f"Invalid depth parameter: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Schema generation error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('validate') @click.argument('file_path', type=click.Path(exists=True, path_type=Path)) @click.option('--schema', '-s', type=click.Path(exists=True, path_type=Path), help='Path to JSON schema file (.json or .md)') @click.option('--schema-json', type=str, help='JSON schema provided as a string') @click.option('--quiet', '-q', is_flag=True, help='Only output validation result (true/false)') @click.option('--detailed-errors', '--errors', is_flag=True, help='Show detailed validation errors (Issue #8)') @click.option('--error-format', type=click.Choice(['text', 'json', 'markdown']), default='text', help='Format for detailed error output') @click.option('--semantic/--no-semantic', default=True, help='Enable/disable semantic validation (sections, patterns, quality)') @click.option('--check-links', is_flag=True, help='Enable link validation (may be slow, requires --semantic)') @click.option('--strict', is_flag=True, help='Treat warnings as errors') @pass_config def validate(config, file_path, schema, schema_json, quiet, detailed_errors, error_format, semantic, check_links, strict): """ Validate a markdown file against a JSON schema. ENHANCED: Now includes semantic validation of x-markitect extensions: - Section classifications (required, recommended, optional, discouraged, improper) - Content patterns (required_patterns, forbidden_patterns) - Quality metrics (min_words, max_words, min_sentences) Checks if a markdown document strictly adheres to the structure defined by a specified schema. Returns boolean result (True/False). Examples: # Structural + semantic validation (default) markitect validate doc.md --schema manpage-schema-v1.0.md # Only structural validation markitect validate doc.md --schema schema.json --no-semantic # Strict mode (warnings become errors) markitect validate doc.md --schema schema.json --strict # Legacy detailed errors markitect validate doc.md --schema schema.json --detailed-errors """ try: validator = SchemaValidator() from .associated_files import AssociatedFilesManager associated_files = AssociatedFilesManager() # Validate schema source or auto-discover schema_sources = [schema, schema_json] provided_sources = [s for s in schema_sources if s is not None] if len(provided_sources) == 0: # Auto-discover associated schema file auto_schema = associated_files.find_associated_schema(file_path) if auto_schema: schema = auto_schema if config.get('verbose'): click.echo(f"Auto-discovered associated schema: {schema}", err=True) else: click.echo("Error: No schema specified and no associated schema file found", err=True) click.echo("Provide --schema FILE or --schema-json JSON, or ensure an associated .json file exists", err=True) sys.exit(1) elif len(provided_sources) > 1: click.echo("Error: Specify exactly one schema source (--schema or --schema-json)", err=True) sys.exit(1) # Load schema dict (supports .json and .md) schema_dict = None if schema: from .semantic_validator import load_schema_from_path schema_dict = load_schema_from_path(schema) schema_source = f"schema file: {schema}" elif schema_json: schema_dict = json.loads(schema_json) schema_source = "provided JSON schema" # Perform validation (with or without detailed errors) if detailed_errors: # Use detailed error reporting for Issue #8 error_collector = validator.validate_file_with_errors(file_path, schema_dict) is_valid = not error_collector.has_errors() # Output detailed errors if quiet: click.echo(str(is_valid).lower()) else: status = "VALID" if is_valid else "INVALID" click.echo(f"Validation result: {status}") click.echo(f"File: {file_path}") click.echo(f"Schema: {schema_source}") if is_valid: click.echo("āœ… Document structure matches schema requirements") else: click.echo("āŒ Document structure does not match schema requirements") click.echo() click.echo(error_collector.format_errors(error_format)) else: # Use simple boolean validation (original Issue #7 functionality) is_valid = validator.validate_file_against_schema(file_path, schema_dict) # Output results if quiet: click.echo(str(is_valid).lower()) else: status = "VALID" if is_valid else "INVALID" click.echo(f"Validation result: {status}") click.echo(f"File: {file_path}") click.echo(f"Schema: {schema_source}") if is_valid: click.echo("āœ… Document structure matches schema requirements") else: click.echo("āŒ Document structure does not match schema requirements") click.echo("šŸ’” Use --detailed-errors to see specific validation issues") # Semantic validation (if enabled and schema has x-markitect extensions) semantic_report = None if semantic and schema_dict: try: from .semantic_validator import SemanticValidator # Check if schema has x-markitect extensions has_extensions = ('x-markitect-sections' in schema_dict or 'x-markitect-content-control' in schema_dict) if has_extensions: sem_validator = SemanticValidator(schema_dict) semantic_report = sem_validator.validate(file_path, check_links=check_links) # Combine with structural validation result if semantic_report and not quiet: click.echo("") click.echo("=" * 60) click.echo("Semantic Validation Results:") click.echo("=" * 60) click.echo(semantic_report.format_text()) # Update validity based on semantic validation if semantic_report: if semantic_report.has_errors(): is_valid = False elif strict and semantic_report.has_warnings(): is_valid = False except Exception as e: # Semantic validation failure doesn't fail the whole command # unless strict mode is enabled if not quiet: click.echo(f"\nāš ļø Semantic validation error: {e}", err=True) if strict: is_valid = False # Exit with appropriate code sys.exit(0 if is_valid else 1) except FileNotFoundError as e: click.echo(f"File not found: {e}", err=True) sys.exit(1) except (InvalidSchemaError, SchemaValidationError) as e: click.echo(f"Schema validation error: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Validation error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # Schema management commands for Issue #3 @cli.command('schema-ingest') @click.argument('schema_file', type=click.Path(exists=True, path_type=Path)) @click.option('--name', type=str, help='Custom name for the schema (default: filename)') @pass_config def schema_ingest(config, schema_file, name): """ Read and store a schema file in the database. Supports both JSON (.json) and Markdown (.md) schema files. Validates schemas against the MarkiTect metaschema to ensure compatibility with MarkiTect features like heading text capture and content instructions. SCHEMA_FILE: Path to the schema file to store (.json or .md) Examples: markitect schema-ingest my_schema.json markitect schema-ingest manpage-schema-v1.0.md markitect schema-ingest external_schema.json --name custom-name """ try: # Determine schema name schema_name = name if name else schema_file.name # Load schema based on file type if schema_file.suffix == '.md': # Load markdown schema from .schema_loader import MarkdownSchemaLoader loader = MarkdownSchemaLoader() try: schema_data_full = loader.load_schema(schema_file) schema_data = schema_data_full['schema'] # Store the JSON content for database schema_content = json.dumps(schema_data, indent=2) if config.get('verbose'): click.echo(f"āœ… Loaded markdown schema: {schema_file.name}") except Exception as e: click.echo(f"Error: Failed to load markdown schema - {e}", err=True) sys.exit(1) else: # Load JSON schema with open(schema_file, 'r', encoding='utf-8') as f: schema_content = f.read() # Validate JSON format try: schema_data = json.loads(schema_content) except json.JSONDecodeError as e: click.echo(f"Error: Invalid JSON in schema file - {e}", err=True) sys.exit(1) # Validate against MarkiTect metaschema from .metaschema import MetaschemaValidator try: metaschema_validator = MetaschemaValidator() validation_result = metaschema_validator.validate_schema_with_errors(schema_data) if not validation_result.is_valid: click.echo("āš ļø Schema validation warnings against MarkiTect metaschema:", err=True) for error in validation_result.errors: click.echo(f" - {error.message}", err=True) click.echo(" Schema will be stored but may not be fully compatible with MarkiTect features.", err=True) else: if config.get('verbose'): click.echo("āœ… Schema validates successfully against MarkiTect metaschema") except Exception as e: if config.get('verbose'): click.echo(f"āš ļø Could not validate against metaschema: {e}", err=True) # Initialize database and store schema from .database import DatabaseManager db_path = config.get('database', 'markitect.db') db_manager = DatabaseManager(db_path) db_manager.initialize_database() record_id = db_manager.store_schema_file(schema_name, schema_content) if record_id: title = schema_data.get('title', schema_name) description = schema_data.get('description', '') click.echo(f"āœ… Schema stored successfully") click.echo(f" Name: {schema_name}") click.echo(f" Title: {title}") if description: click.echo(f" Description: {description}") click.echo(f" Record ID: {record_id}") if config.get('verbose'): click.echo(f" Source file: {schema_file}") click.echo(f" Database: {db_path}") else: click.echo("āŒ Failed to store schema in database", err=True) sys.exit(1) except Exception as e: click.echo(f"Schema ingest error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('schema-list') @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @click.option('--names-only', is_flag=True, help='Show only schema names (no metadata)') @pass_config def schema_list(config, output_format, names_only): """ List all stored schema files. Shows metadata for all JSON schemas stored in the database, including their names, titles, descriptions, and timestamps. Examples: markitect schema-list markitect schema-list --format json markitect schema-list --format simple markitect schema-list --names-only """ try: from .database import DatabaseManager db_path = config.get('database', 'markitect.db') db_manager = DatabaseManager(db_path) schemas = db_manager.list_schema_files() if not schemas: click.echo("No schemas found in database.") return # Handle names-only option if names_only: for schema_info in schemas: click.echo(schema_info['filename']) return # Add numbering to all schemas (1-indexed) for idx, schema_info in enumerate(schemas, 1): schema_info['number'] = idx # Handle different output formats if output_format == 'simple': # Simple emoji format like the original list command click.echo(f"Found {len(schemas)} schema(s):") click.echo() for schema_info in schemas: # Format timestamp for display (remove microseconds) created = schema_info['created_at'] if created: # Format: YYYY-MM-DD HH:MM:SS (remove microseconds if present) if '.' in created: created_display = created.split('.')[0] else: created_display = created click.echo(f"[{schema_info['number']}] šŸ”§ {schema_info['filename']:<40} (added: {created_display})") else: click.echo(f"[{schema_info['number']}] šŸ”§ {schema_info['filename']}") if config.get('verbose'): click.echo(f" Title: {schema_info['title']}") click.echo(f" Created: {schema_info['created_at']}") click.echo(f" Updated: {schema_info['updated_at']}") if schema_info['description']: click.echo(f" Description: {schema_info['description']}") click.echo() elif output_format == 'table': # Custom table format for better readability table_data = [] for schema in schemas: # Format timestamps (remove microseconds) created_date = schema['created_at'].split('.')[0] if schema['created_at'] and '.' in schema['created_at'] else schema['created_at'] updated_date = schema['updated_at'].split('.')[0] if schema['updated_at'] and '.' in schema['updated_at'] else schema['updated_at'] table_data.append({ '#': schema['number'], 'Name': schema['filename'], 'Title': schema['title'] or '', 'Created': created_date or '', 'Updated': updated_date or '' }) if table_data: headers = ['#', 'Name', 'Title', 'Created', 'Updated'] rows = [[row[h] for h in headers] for row in table_data] click.echo(tabulate(rows, headers=headers, tablefmt='simple')) else: # Use structured format (json, yaml) formatted_output = format_output(schemas, output_format) click.echo(formatted_output) if config.get('verbose'): click.echo(f"\nTotal schemas: {len(schemas)}", err=True) except Exception as e: click.echo(f"Schema list error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('schema-get') @click.argument('schema_name', type=str) @click.option('--output', '-o', type=click.Path(path_type=Path), help='Output file path (default: stdout)') @pass_config def schema_get(config, schema_name, output): """ Retrieve and output a stored schema file. Fetches a JSON schema from the database by name and outputs its content either to stdout or to a specified file. SCHEMA_NAME: Name of the stored schema to retrieve Examples: markitect schema-get my_schema.json markitect schema-get my_schema.json --output exported_schema.json """ try: from .database import DatabaseManager db_path = config.get('database', 'markitect.db') db_manager = DatabaseManager(db_path) schema_data = db_manager.get_schema_file(schema_name) if not schema_data: click.echo(f"Error: Schema '{schema_name}' not found in database", err=True) sys.exit(1) schema_content = schema_data['schema_content'] # Output to file or stdout if output: with open(output, 'w', encoding='utf-8') as f: f.write(schema_content) click.echo(f"āœ… Schema exported to: {output}") if config.get('verbose'): click.echo(f" Title: {schema_data['title']}") click.echo(f" Description: {schema_data['description']}") else: click.echo(schema_content) except Exception as e: click.echo(f"Schema get error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('schema-delete') @click.argument('schema_name', type=str) @click.option('--confirm', is_flag=True, help='Skip confirmation prompt') @pass_config def schema_delete(config, schema_name, confirm): """ Delete a stored schema file from the database. Removes a JSON schema from the database permanently. This action cannot be undone. SCHEMA_NAME: Name of the stored schema to delete Examples: markitect schema-delete old_schema.json markitect schema-delete old_schema.json --confirm """ try: from .database import DatabaseManager db_path = config.get('database', 'markitect.db') db_manager = DatabaseManager(db_path) # Check if schema exists schema_data = db_manager.get_schema_file(schema_name) if not schema_data: click.echo(f"Error: Schema '{schema_name}' not found in database", err=True) sys.exit(1) # Confirmation prompt if not confirm: title = schema_data['title'] click.echo(f"Schema to delete:") click.echo(f" Name: {schema_name}") click.echo(f" Title: {title}") click.echo(f" Created: {schema_data['created_at']}") if not click.confirm("Are you sure you want to delete this schema?"): click.echo("Deletion cancelled.") return # Perform deletion success = db_manager.delete_schema_file(schema_name) if success: click.echo(f"āœ… Schema '{schema_name}' deleted successfully") else: click.echo(f"āŒ Failed to delete schema '{schema_name}'", err=True) sys.exit(1) except Exception as e: click.echo(f"Schema delete error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # Schema validation helper functions and dataclasses @dataclass class ValidationResult: """Result of validating a single schema.""" number: Optional[int] # Number in the list (if from registry) schema_name: str # Display name source_type: str # 'registry' or 'filesystem' is_valid: bool errors: List[str] title: Optional[str] = None version: Optional[str] = None schema_id: Optional[str] = None def is_filesystem_path(selector: str) -> bool: """Check if selector looks like a filesystem path. Args: selector: User input string Returns: True if selector appears to be a filesystem path """ return ( selector.startswith('./') or selector.startswith('../') or selector.startswith('/') or '/' in selector ) def parse_schema_selector(selector: str, schemas: List[dict]) -> List[str]: """Parse user input into list of schema filenames. Supports: - Single number: "1" - Number range: "1-3" - Number list: "1,3,5" - Keyword "all": returns all schemas - Filename: "manpage-schema-v1.0.md" Args: selector: User input string schemas: List of schema dicts with 'number' and 'filename' keys Returns: List of schema filenames Raises: ValueError: If selector format is invalid or numbers out of range """ if not selector or selector.lower() == 'all': return [s['filename'] for s in schemas] # Check if it looks like a filename (contains extension or is not a number/range) if not selector.replace(',', '').replace('-', '').replace(' ', '').isdigit(): # Assume it's a filename return [selector] # Parse number selection selected_numbers = set() # Handle comma-separated list: "1,3,5" parts = [part.strip() for part in selector.split(',')] for part in parts: if '-' in part: # Handle range: "1-3" try: start_str, end_str = part.split('-', 1) start = int(start_str.strip()) end = int(end_str.strip()) if start < 1 or end > len(schemas): raise ValueError( f"Range {start}-{end} is out of bounds. " f"Valid range: 1-{len(schemas)}" ) if start > end: raise ValueError(f"Invalid range: {start}-{end} (start > end)") selected_numbers.update(range(start, end + 1)) except ValueError as e: if "invalid literal" in str(e): raise ValueError(f"Invalid range format: '{part}'") raise else: # Handle single number: "1" try: num = int(part) if num < 1 or num > len(schemas): raise ValueError( f"Number {num} is out of bounds. " f"Valid range: 1-{len(schemas)}" ) selected_numbers.add(num) except ValueError as e: if "invalid literal" in str(e): raise ValueError(f"Invalid number: '{part}'") raise # Convert numbers to filenames number_to_filename = {s['number']: s['filename'] for s in schemas} return [number_to_filename[num] for num in sorted(selected_numbers)] def resolve_schema_source(identifier: str, db_manager: DatabaseManager) -> Tuple[str, dict, str]: """Resolve schema identifier to its source. Resolution order: 1. Check registry by exact filename match 2. If looks like path or not found in registry, try filesystem Args: identifier: Schema filename or path db_manager: Database manager instance Returns: Tuple of (source_type, schema_data, display_name) - source_type: 'registry' or 'filesystem' - schema_data: Dict with schema content or Path object - display_name: Human-readable name for display Raises: FileNotFoundError: If schema not found in registry or filesystem """ # First, try registry (exact filename match) schema_data = db_manager.get_schema_file(identifier) if schema_data: return ('registry', schema_data, identifier) # If not found in registry, try filesystem # (either because it looks like a path or as a fallback) schema_path = Path(identifier) if schema_path.exists(): return ('filesystem', {'path': schema_path}, str(schema_path)) # Not found anywhere raise FileNotFoundError( f"Schema '{identifier}' not found in registry or filesystem. " f"Use 'markitect schema-list' to see available schemas." ) def format_validation_summary(results: List[ValidationResult]) -> str: """Format batch validation results as a table. Args: results: List of ValidationResult objects Returns: Formatted table string """ if not results: return "No validation results." # Build table data table_data = [] for result in results: # Number column (if available) num_str = str(result.number) if result.number else '-' # Status column status = 'āœ… Valid' if result.is_valid else 'āŒ Failed' # Details column if result.is_valid: details = f"v{result.version}" if result.version else 'OK' else: error_count = len(result.errors) details = f"{error_count} error{'s' if error_count != 1 else ''}" table_data.append([num_str, result.schema_name, status, details]) # Format as table headers = ['#', 'Schema', 'Status', 'Details'] table = tabulate(table_data, headers=headers, tablefmt='simple') return table @cli.command('schema-validate') @click.argument('schema_selector', type=str, required=False) @click.option('--all', 'validate_all', is_flag=True, help='Validate all registered schemas') @click.option('--detailed-errors', is_flag=True, help='Show detailed validation errors') @pass_config def schema_validate_cmd(config, schema_selector, validate_all, detailed_errors): """ Validate schema file(s) against the schema-for-schemas metaschema. Ensures schema files follow MarkiTect conventions and standards: - Required fields ($schema, $id, title, description, version) - Version format (SemVer: major.minor.patch) - $id URL format (HTTPS with version) - MarkiTect extensions (x-markitect-*) - Section classification structures SCHEMA_SELECTOR: Schema selection (optional): - Number: "1" - Range: "1-3" - List: "1,3,5" - Filename: "manpage-schema-v1.0.md" - Path: "./my-schema.md" - Keyword: "all" If no selector provided and --all not specified, shows usage help. Examples: markitect schema-validate 1 markitect schema-validate 1-3 markitect schema-validate 1,3,5 markitect schema-validate --all markitect schema-validate manpage-schema-v1.0.md markitect schema-validate ./my-schema.md --detailed-errors """ try: from .schema_loader import MarkdownSchemaLoader try: import jsonschema from jsonschema import Draft7Validator, ValidationError except ImportError: click.echo("āŒ Error: jsonschema package not installed", err=True) click.echo("Install it with: pip install jsonschema", err=True) sys.exit(1) # Determine what to validate if validate_all: selector = 'all' elif schema_selector: selector = schema_selector else: click.echo("āŒ Error: No schema specified", err=True) click.echo("\nUsage:") click.echo(" markitect schema-validate 1 # Validate schema #1") click.echo(" markitect schema-validate 1-3 # Validate schemas #1-3") click.echo(" markitect schema-validate 1,3,5 # Validate schemas #1,3,5") click.echo(" markitect schema-validate --all # Validate all schemas") click.echo(" markitect schema-validate schema.md # Validate by filename") click.echo(" markitect schema-validate ./schema.md # Validate by path") click.echo("\nUse 'markitect schema-list' to see available schemas.") sys.exit(1) db_path = config.get('database', 'markitect.db') db_manager = DatabaseManager(db_path) loader = MarkdownSchemaLoader() # Load metaschema once metaschema_path = Path(__file__).parent / 'schemas' / 'schema-schema-v1.0.md' if not metaschema_path.exists(): click.echo(f"āŒ Metaschema not found: {metaschema_path}", err=True) sys.exit(1) try: metaschema_data = loader.load_schema(metaschema_path) metaschema = metaschema_data['schema'] except Exception as e: click.echo(f"āŒ Failed to load metaschema: {e}", err=True) sys.exit(1) # Resolve which schemas to validate schemas_to_validate = [] # Check if selector is a filesystem path if selector != 'all' and is_filesystem_path(selector): # Direct filesystem path - validate single file schema_path = Path(selector) if not schema_path.exists(): click.echo(f"āŒ File not found: {selector}", err=True) sys.exit(1) schemas_to_validate.append({ 'identifier': selector, 'number': None, 'source_type': 'filesystem' }) else: # Number/range/filename - get registry list and parse all_schemas = db_manager.list_schema_files() if not all_schemas: click.echo("āŒ No schemas found in registry", err=True) click.echo("Use 'markitect schema-ingest' to add schemas first.", err=True) sys.exit(1) # Add numbering for idx, schema_info in enumerate(all_schemas, 1): schema_info['number'] = idx # Parse selector try: selected_filenames = parse_schema_selector(selector, all_schemas) except ValueError as e: click.echo(f"āŒ Invalid selector: {e}", err=True) sys.exit(1) # Build list of schemas to validate filename_to_number = {s['filename']: s['number'] for s in all_schemas} for filename in selected_filenames: schemas_to_validate.append({ 'identifier': filename, 'number': filename_to_number.get(filename), 'source_type': 'registry' }) # Validate schemas results = [] validator = Draft7Validator(metaschema) # Show progress for multiple schemas if len(schemas_to_validate) > 1: click.echo(f"Validating {len(schemas_to_validate)} schema(s)...\n") for schema_info in schemas_to_validate: identifier = schema_info['identifier'] number = schema_info['number'] source_type = schema_info['source_type'] try: # Resolve and load schema if source_type == 'filesystem': schema_path = Path(identifier) if schema_path.suffix == '.md': schema_data = loader.load_schema(schema_path) schema = schema_data['schema'] else: schema = json.loads(schema_path.read_text()) display_name = str(schema_path) else: # From registry source_type, schema_data, display_name = resolve_schema_source( identifier, db_manager ) if source_type == 'registry': schema = json.loads(schema_data['schema_content']) else: # Fallback to filesystem schema_path = schema_data['path'] if schema_path.suffix == '.md': loaded = loader.load_schema(schema_path) schema = loaded['schema'] else: schema = json.loads(schema_path.read_text()) # Validate errors = list(validator.iter_errors(schema)) # Create result result = ValidationResult( number=number, schema_name=display_name, source_type=source_type, is_valid=(len(errors) == 0), errors=[error.message for error in errors], title=schema.get('title'), version=schema.get('version'), schema_id=schema.get('$id') ) results.append(result) except FileNotFoundError as e: # Schema not found result = ValidationResult( number=number, schema_name=identifier, source_type=source_type, is_valid=False, errors=[str(e)] ) results.append(result) except Exception as e: # Other error result = ValidationResult( number=number, schema_name=identifier, source_type=source_type, is_valid=False, errors=[f"Failed to load: {e}"] ) results.append(result) # Display results if len(results) == 1: # Single schema - detailed output (backward compatible) result = results[0] if result.is_valid: click.echo(f"āœ… Schema is valid: {result.schema_name}") if result.title: click.echo(f" Title: {result.title}") if result.version: click.echo(f" Version: {result.version}") if result.schema_id: click.echo(f" $id: {result.schema_id}") else: click.echo(f"āŒ Schema validation failed: {result.schema_name}", err=True) click.echo(f"\nFound {len(result.errors)} validation error(s):\n", err=True) for i, error_msg in enumerate(result.errors, 1): click.echo(f"{i}. {error_msg}", err=True) sys.exit(1) else: # Multiple schemas - summary table click.echo("Results:\n") click.echo(format_validation_summary(results)) # Summary counts valid_count = sum(1 for r in results if r.is_valid) failed_count = len(results) - valid_count click.echo(f"\nSummary: {valid_count} valid, {failed_count} failed") # Show failed details if failed_count > 0: click.echo("\nFailed schemas:") for result in results: if not result.is_valid: num_str = f"{result.number}. " if result.number else "" click.echo(f" {num_str}{result.schema_name}", err=True) for error_msg in result.errors[:3]: # Show first 3 errors click.echo(f" - {error_msg}", err=True) if len(result.errors) > 3: click.echo(f" ... and {len(result.errors) - 3} more", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Schema validation error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('schema-analyze') @click.argument('schema_file', type=click.Path(exists=True)) @click.option('--verbose', '-v', is_flag=True, help='Show detailed analysis') @pass_config def schema_analyze_cmd(config, schema_file, verbose): """ Analyze a schema for rigidity issues and suggest improvements. Examines JSON schemas to detect: - Exact counts that should be ranges - Missing classification system - Deprecated extensions - Overly specific constraints Returns exit code 0 for flexible schemas, 1 for rigid schemas, 2 for errors. Examples: markitect schema-analyze schema.json markitect schema-analyze schema.json --verbose """ from .schema_analyzer import analyze_schema_cli sys.exit(analyze_schema_cli(schema_file, verbose=verbose)) @cli.command('schema-refine') @click.argument('schema_file', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output file (default: overwrite input file)') @click.option('--loosen-counts', is_flag=True, default=True, help='Convert exact counts to flexible ranges (default: enabled)') @click.option('--no-loosen-counts', is_flag=True, help='Disable count loosening') @click.option('--round-numbers', is_flag=True, default=True, help='Round overly specific numbers (default: enabled)') @click.option('--no-round-numbers', is_flag=True, help='Disable number rounding') @click.option('--migrate-deprecated', is_flag=True, default=False, help='Migrate deprecated extensions (requires manual review)') @click.option('--dry-run', is_flag=True, help='Show changes without applying them') @click.option('--interactive', '-i', is_flag=True, help='Prompt for each refinement interactively') @pass_config def schema_refine_cmd(config, schema_file, output, loosen_counts, no_loosen_counts, round_numbers, no_round_numbers, migrate_deprecated, dry_run, interactive): """ Refine a schema by automatically applying fixes for rigidity issues. This command analyzes the schema and applies automatic fixes: - Converts exact counts to flexible ranges - Rounds overly specific numbers - Widens narrow integer constraints - Documents deprecated extension usage By default, the input file is overwritten. Use --output to save to a different file. Examples: # Refine schema in place markitect schema-refine schema.json # Preview changes without applying markitect schema-refine schema.json --dry-run # Review each fix interactively markitect schema-refine schema.json --interactive # Save refined schema to new file markitect schema-refine schema.json --output refined-schema.json # Disable specific refinements markitect schema-refine schema.json --no-loosen-counts """ from .schema_refiner import refine_schema_cli # Handle flag conflicts loosen = loosen_counts and not no_loosen_counts round_nums = round_numbers and not no_round_numbers sys.exit(refine_schema_cli( schema_file, output=output, loosen_counts=loosen, migrate_deprecated=migrate_deprecated, round_numbers=round_nums, dry_run=dry_run, interactive=interactive )) @cli.command('generate-stub') @click.argument('schema_file', type=click.Path(exists=True, path_type=Path)) @click.option('--output', '-o', type=click.Path(path_type=Path), help='Output file path (default: stdout)') @click.option('--style', type=click.Choice(['default', 'custom', 'detailed']), default='default', help='Placeholder content style') @click.option('--title', type=str, help='Custom document title') @pass_config def generate_stub(config, schema_file, output, style, title): """ Generate a markdown stub/template from a JSON schema. Creates a markdown document with proper heading hierarchy and placeholder content based on the structural definitions in the JSON schema. When schemas include content instructions (x-markitect-content-instructions), the generated stub will use specific guidance text instead of generic placeholders. SCHEMA_FILE: Path to the JSON schema file Examples: markitect generate-stub blog_schema.json markitect generate-stub schema.json --output template.md markitect generate-stub schema.json --style detailed --title "My Document" # Content instructions will be used automatically when present in schema markitect generate-stub schema_with_instructions.json Content Instructions: When a schema contains x-markitect-content-instructions-enabled: true, the generated stub will include specific content guidance from the schema instead of generic "TODO" placeholders. This is especially useful with schemas created using the --include-content-instructions option. Schema Reference: Generated stubs include a comment referencing the source schema file for validation and traceability purposes. """ try: if config.get('verbose'): click.echo(f"Generating stub from schema: {schema_file}", err=True) from .stub_generator import StubGenerator from .associated_files import AssociatedFilesManager generator = StubGenerator() associated_files = AssociatedFilesManager() # Load schema (supports .json and .md) from .semantic_validator import load_schema_from_path schema = load_schema_from_path(schema_file) stub_content = generator.generate_stub_from_schema( schema, placeholder_style=style, title=title, schema_file_path=schema_file ) # Mode-based output logic if not output and should_use_associated_files(): # Interactive mode: use associated file path output = associated_files.get_associated_markdown_path(schema_file) if config.get('verbose'): click.echo(f"Interactive mode: using associated file path: {output}", err=True) # Output to file or stdout if output: generator.generate_stub_to_file(schema, output, style, title, schema_file) click.echo(f"āœ… Stub generated: {output}") if config.get('verbose'): click.echo(f"Generated markdown template saved to: {output}", err=True) else: click.echo(stub_content) if config.get('verbose'): click.echo(f"Generated {len(stub_content)} characters of content", err=True) except FileNotFoundError as e: click.echo(f"Error: {e}", err=True) sys.exit(1) except json.JSONDecodeError as e: click.echo(f"Error: Invalid JSON in schema file - {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Stub generation error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('generate-drafts') @click.argument('schema_file', type=click.Path(exists=True, path_type=Path)) @click.argument('data_source', type=click.Path(exists=True, path_type=Path)) @click.option('--output-dir', '-o', type=click.Path(path_type=Path), required=True, help='Output directory for generated drafts') @pass_config def generate_drafts(config, schema_file, data_source, output_dir): """ Generate multiple document drafts from a schema and data source. Creates multiple markdown documents by combining a JSON schema template with data from JSON or CSV sources. Each record in the data source generates a separate draft file with field mapping applied. SCHEMA_FILE: Path to the JSON schema file DATA_SOURCE: Path to JSON or CSV data source file Examples: markitect generate-drafts schema.json data.json -o ./drafts/ markitect generate-drafts blog_schema.json posts.csv -o ./blog_posts/ Field Mapping: Use x-markitect-field-mapping extension in schema to map data fields to content areas. Data validation ensures compatibility. Output: Generated drafts maintain schema references for validation and use automatic file naming based on data content. """ try: if config.get('verbose'): click.echo(f"Generating drafts from schema: {schema_file}", err=True) click.echo(f"Using data source: {data_source}", err=True) click.echo(f"Output directory: {output_dir}", err=True) from .draft_generator import DraftGenerator generator = DraftGenerator() # Load schema import json with open(schema_file, 'r') as f: schema = json.load(f) # Generate drafts generated_files = generator.generate_drafts_from_data_source( schema=schema, data_source=data_source, output_dir=Path(output_dir), schema_file_path=str(schema_file) ) # Report results click.echo(f"āœ… Generated {len(generated_files)} drafts in {output_dir}") if config.get('verbose'): for file_path in generated_files: click.echo(f" - {file_path}", err=True) except FileNotFoundError as e: click.echo(f"Error: {e}", err=True) sys.exit(1) except json.JSONDecodeError as e: click.echo(f"Error: Invalid JSON in schema file - {e}", err=True) sys.exit(1) except ValueError as e: click.echo(f"Error: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Draft generation error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.group('associated-files') @pass_config def associated_files_group(config): """ Manage associated markdown and schema file pairs. Commands for working with files that follow the convention of having identical basenames with different extensions (e.g., document.md ↔ document.json). """ pass @associated_files_group.command('list') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @click.argument('directory', type=click.Path(exists=True, file_okay=False, path_type=Path), default='.') @pass_config def list_associated_files(config, format, directory): """ List all associated file pairs in a directory. Shows markdown/schema file pairs that follow the naming convention. Examples: markitect associated-files list markitect associated-files list docs/ markitect associated-files list --format json """ try: from .associated_files import AssociatedFilesManager manager = AssociatedFilesManager() if config.get('verbose'): click.echo(f"Scanning directory: {directory}", err=True) pairs = manager.list_file_pairs(directory) if not pairs: click.echo("No associated file pairs found.") return # Format output if format == 'table': click.echo(f"Associated File Pairs in {directory}:") click.echo("=" * 60) for pair in pairs: click.echo(f"šŸ“„ {pair['basename']}") click.echo(f" Markdown: {pair['markdown_file'].name}") click.echo(f" Schema: {pair['schema_file'].name}") click.echo() elif format == 'json': import json output_data = [] for pair in pairs: output_data.append({ 'basename': pair['basename'], 'markdown_file': str(pair['markdown_file']), 'schema_file': str(pair['schema_file']), 'both_exist': pair['both_exist'] }) click.echo(json.dumps(output_data, indent=2)) elif format == 'yaml': import yaml output_data = [] for pair in pairs: output_data.append({ 'basename': pair['basename'], 'markdown_file': str(pair['markdown_file']), 'schema_file': str(pair['schema_file']), 'both_exist': pair['both_exist'] }) click.echo(yaml.dump(output_data, default_flow_style=False)) else: # Simple format for pair in pairs: click.echo(f"{pair['basename']} ({pair['markdown_file'].name} ↔ {pair['schema_file'].name})") if config.get('verbose'): click.echo(f"Found {len(pairs)} associated file pairs", err=True) except Exception as e: click.echo(f"Error listing associated files: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @associated_files_group.command('info') @click.argument('file_path', type=click.Path(exists=True, path_type=Path)) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def associated_files_info(config, file_path, format): """ Show detailed information about associated files. Displays information about a file and its associated counterpart. Examples: markitect associated-files info document.md markitect associated-files info schema.json --format json """ try: from .associated_files import AssociatedFilesManager manager = AssociatedFilesManager() info = manager.get_file_pair_info(file_path) if format == 'table': click.echo(f"Associated Files Information:") click.echo("=" * 40) click.echo(f"Basename: {info['basename']}") click.echo(f"Markdown: {info['markdown_file']}") click.echo(f" Exists: {'āœ…' if info['markdown_file'].exists() else 'āŒ'}") if 'markdown_size' in info: click.echo(f" Size: {info['markdown_size']} bytes") click.echo(f"Schema: {info['schema_file']}") click.echo(f" Exists: {'āœ…' if info['schema_file'].exists() else 'āŒ'}") if 'schema_size' in info: click.echo(f" Size: {info['schema_size']} bytes") click.echo(f"Both exist: {'āœ…' if info['both_exist'] else 'āŒ'}") elif format == 'json': import json # Convert Path objects to strings for JSON serialization json_info = {k: str(v) if isinstance(v, Path) else v for k, v in info.items()} click.echo(json.dumps(json_info, indent=2)) elif format == 'yaml': import yaml # Convert Path objects to strings for YAML serialization yaml_info = {k: str(v) if isinstance(v, Path) else v for k, v in info.items()} click.echo(yaml.dump(yaml_info, default_flow_style=False)) else: # Simple format status = "paired" if info['both_exist'] else "orphaned" click.echo(f"{info['basename']}: {status}") except Exception as e: click.echo(f"Error getting file info: {e}", err=True) sys.exit(1) @associated_files_group.command('status') @click.argument('directory', type=click.Path(exists=True, file_okay=False, path_type=Path), default='.') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def associated_files_status(config, directory, format): """ Show status of associated files in a directory. Displays paired files and orphaned files (files without their counterpart). Examples: markitect associated-files status markitect associated-files status docs/ """ try: from .associated_files import AssociatedFilesManager manager = AssociatedFilesManager() status = manager.get_directory_status(directory) if format == 'table': click.echo(f"Associated Files Status for {directory}:") click.echo("=" * 50) click.echo(f"šŸ“Œ Paired files: {status['paired_files']}") click.echo(f"šŸ“„ Orphaned markdown: {status['orphaned_markdown']}") click.echo(f"šŸ”§ Orphaned schemas: {status['orphaned_schemas']}") if status['pairs']: click.echo("\nšŸ“Œ Paired Files:") for pair in status['pairs']: click.echo(f" • {pair['basename']}") if status['orphaned']['orphaned_markdown']: click.echo("\nšŸ“„ Orphaned Markdown Files:") for md in status['orphaned']['orphaned_markdown']: click.echo(f" • {md.name}") if status['orphaned']['orphaned_schemas']: click.echo("\nšŸ”§ Orphaned Schema Files:") for schema in status['orphaned']['orphaned_schemas']: click.echo(f" • {schema.name}") elif format == 'json': import json # Convert Path objects to strings for JSON serialization json_status = { 'directory': str(status['directory']), 'paired_files': status['paired_files'], 'orphaned_markdown': status['orphaned_markdown'], 'orphaned_schemas': status['orphaned_schemas'], 'pairs': [{'basename': p['basename'], 'markdown_file': str(p['markdown_file']), 'schema_file': str(p['schema_file'])} for p in status['pairs']], 'orphaned': { 'orphaned_markdown': [str(f) for f in status['orphaned']['orphaned_markdown']], 'orphaned_schemas': [str(f) for f in status['orphaned']['orphaned_schemas']] } } click.echo(json.dumps(json_status, indent=2)) elif format == 'yaml': import yaml yaml_status = { 'directory': str(status['directory']), 'paired_files': status['paired_files'], 'orphaned_markdown': status['orphaned_markdown'], 'orphaned_schemas': status['orphaned_schemas'], 'pairs': [{'basename': p['basename'], 'markdown_file': str(p['markdown_file']), 'schema_file': str(p['schema_file'])} for p in status['pairs']], 'orphaned': { 'orphaned_markdown': [str(f) for f in status['orphaned']['orphaned_markdown']], 'orphaned_schemas': [str(f) for f in status['orphaned']['orphaned_schemas']] } } click.echo(yaml.dump(yaml_status, default_flow_style=False)) else: # Simple format click.echo(f"Paired: {status['paired_files']}, Orphaned: {status['orphaned_markdown'] + status['orphaned_schemas']}") except Exception as e: click.echo(f"Error getting status: {e}", err=True) sys.exit(1) @associated_files_group.command('create-schema') @click.argument('markdown_file', type=click.Path(exists=True, path_type=Path)) @click.option('--max-depth', '-d', type=int, help='Maximum heading depth to include in schema') @pass_config def create_associated_schema(config, markdown_file, max_depth): """ Create an associated schema file for a markdown file. Generates a JSON schema and places it next to the source markdown file with the same basename but .json extension. Examples: markitect associated-files create-schema document.md markitect associated-files create-schema doc.md --max-depth 3 """ try: from .associated_files import AssociatedFilesManager from .schema_generator import SchemaGenerator manager = AssociatedFilesManager() generator = SchemaGenerator() # Check if associated schema already exists existing_schema = manager.find_associated_schema(markdown_file) if existing_schema: if not click.confirm(f"Associated schema {existing_schema} already exists. Overwrite?"): click.echo("Operation cancelled.") return # Generate schema schema = generator.generate_schema_from_file(markdown_file, max_depth=max_depth) # Save to associated path schema_path = manager.get_associated_schema_path(markdown_file) import json with open(schema_path, 'w', encoding='utf-8') as f: json.dump(schema, f, indent=2, ensure_ascii=False) click.echo(f"āœ… Created associated schema: {schema_path}") if config.get('verbose'): properties = schema.get('properties', {}) click.echo(f"Generated schema with {len(properties)} property types", err=True) except Exception as e: click.echo(f"Error creating schema: {e}", err=True) sys.exit(1) @associated_files_group.command('create-stub') @click.argument('schema_file', type=click.Path(exists=True, path_type=Path)) @click.option('--style', type=click.Choice(['default', 'custom', 'detailed']), default='default', help='Placeholder content style') @click.option('--title', type=str, help='Custom document title') @pass_config def create_associated_stub(config, schema_file, style, title): """ Create an associated markdown stub for a schema file. Generates a markdown template and places it next to the source schema file with the same basename but .md extension. Examples: markitect associated-files create-stub schema.json markitect associated-files create-stub schema.json --style detailed """ try: from .associated_files import AssociatedFilesManager from .stub_generator import StubGenerator manager = AssociatedFilesManager() generator = StubGenerator() # Check if associated markdown already exists existing_md = manager.find_associated_markdown(schema_file) if existing_md: if not click.confirm(f"Associated markdown {existing_md} already exists. Overwrite?"): click.echo("Operation cancelled.") return # Load schema and generate stub import json with open(schema_file, 'r') as f: schema = json.load(f) # Save to associated path md_path = manager.get_associated_markdown_path(schema_file) generator.generate_stub_to_file(schema, md_path, style, title) click.echo(f"āœ… Created associated stub: {md_path}") if config.get('verbose'): content = md_path.read_text() click.echo(f"Generated {len(content)} characters of content", err=True) except Exception as e: click.echo(f"Error creating stub: {e}", err=True) sys.exit(1) @cli.command('db-delete') @click.option('--force', is_flag=True, help='Delete without confirmation prompt') @click.option('--database', type=click.Path(), help='Database file path (overrides global setting)') @pass_config def db_delete(config, force, database): """ Delete the database file. WARNING: This operation cannot be undone. All stored data will be lost. Examples: markitect db-delete markitect db-delete --force markitect db-delete --database /path/to/db.sqlite --force """ try: # Use command-specific database option or fall back to global config if database: db_path = Path(database) else: db_path = Path(config.get('database_path', os.path.expanduser('~/.markitect/markitect.db'))) if not db_path.exists(): click.echo(f"Database file not found: {db_path}") return if not force: if not click.confirm(f"āš ļø Are you sure you want to delete the database at {db_path}?\nThis action cannot be undone."): click.echo("Operation cancelled.") return # Delete the database file db_path.unlink() click.echo(f"āœ… Database deleted: {db_path}") if config.get('verbose'): click.echo("All stored data has been permanently removed.", err=True) except Exception as e: click.echo(f"Error deleting database: {e}", err=True) sys.exit(1) @cli.command('db-stats') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @click.option('--database', type=click.Path(), help='Database file path (overrides global setting)') @pass_config def db_stats(config, format, database): """ Show database statistics and information. Display database size and basic information. For detailed table analysis, use existing database commands after ensuring the database is accessible. Renamed from db-status for consistency with subsystem naming convention. Examples: markitect db-stats markitect db-stats --format json markitect db-stats --database /path/to/db.sqlite """ try: # Use command-specific database option or fall back to global config if database: db_path = Path(database) else: db_path = Path(config.get('database_path', os.path.expanduser('~/.markitect/markitect.db'))) if not db_path.exists(): if format == 'json': click.echo('{"error": "Database not found", "path": "' + str(db_path) + '"}') elif format == 'yaml': click.echo(f'error: Database not found\npath: {db_path}') else: click.echo(f"Database file not found: {db_path}") return # Basic file information (no database connection needed) file_size = db_path.stat().st_size stats = { 'database_path': str(db_path), 'exists': True, 'size_bytes': file_size, 'size_human': format_file_size(file_size), 'status': 'accessible' if db_path.is_file() else 'inaccessible' } # Format and display statistics formatted_output = format_output(stats, format) click.echo(formatted_output) if config.get('verbose'): click.echo(f"Database status retrieved successfully", err=True) except Exception as e: click.echo(f"Error getting database status: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('db-data') @click.argument('file_path', type=str) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def db_data(config, file_path, format): """ Display complete file data including metadata, frontmatter, and content. Show comprehensive information about a specific file including its front matter, database metadata, and processing information. This is the new name for what was previously called 'metadata'. FILE_PATH: Name of the file to display data for Examples: markitect db-data README.md markitect db-data docs/guide.md --format json markitect db-data config.md --format yaml """ try: if config['verbose']: click.echo(f"Retrieving complete data for: {file_path}", err=True) db_manager = config['db_manager'] # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect ingest' to process the file first.", err=True) sys.exit(1) # Parse front matter for better display if file_info.get('front_matter'): try: if isinstance(file_info['front_matter'], str): file_info['front_matter'] = eval(file_info['front_matter']) except (ValueError, TypeError, SyntaxError): if config['verbose']: click.echo("Warning: Could not parse front matter", err=True) # Format and display complete data formatted_output = format_output(file_info, format) click.echo(formatted_output) if config['verbose']: content_length = len(file_info.get('content', '')) click.echo(f"Content length: {content_length} characters", err=True) except Exception as e: click.echo(f"Error retrieving file data: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) def format_file_size(size_bytes): """Format file size in human-readable format.""" if size_bytes < 1024: return f"{size_bytes} B" elif size_bytes < 1024 * 1024: return f"{size_bytes / 1024:.1f} KB" elif size_bytes < 1024 * 1024 * 1024: return f"{size_bytes / (1024 * 1024):.1f} MB" else: return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB" # Legacy Agent Management Commands # ================================= # Comprehensive CLI interface for managing legacy interface lifecycle @cli.group('legacy') def legacy_management(): """ Manage legacy interface compatibility and lifecycle. Provides comprehensive tools for analyzing, managing, and cleaning up legacy interfaces including deprecation progression, migration assistance, and automated maintenance. """ if not LEGACY_SYSTEM_AVAILABLE: click.echo("Error: Legacy management system not available", err=True) click.echo("Install with: pip install markitect[legacy]", err=True) sys.exit(1) @legacy_management.command('status') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default='table', help='Output format') @click.option('--include-removed', is_flag=True, help='Include removed interfaces') @pass_config def legacy_status(config, format, include_removed): """ Show status of all legacy interfaces. Displays comprehensive information about all registered legacy interfaces including their current status, deprecation dates, and removal schedules. Examples: markitect legacy status markitect legacy status --format json markitect legacy status --include-removed """ try: registry = LegacyRegistry() # Get all legacy interfaces interfaces = [] for command in registry._interfaces: for version, interface in registry._interfaces[command].items(): if not include_removed and interface.status == LegacyStatus.REMOVED: continue interfaces.append({ 'command': interface.command, 'version': interface.version, 'status': interface.status.value, 'deprecated_date': interface.deprecated_date, 'removal_date': interface.removal_date, 'git_commit': interface.git_commit[:8] if interface.git_commit else 'N/A', 'description': interface.description or 'No description' }) if format == 'json': click.echo(json.dumps(interfaces, indent=2)) elif format == 'yaml': import yaml click.echo(yaml.dump(interfaces, default_flow_style=False)) elif format == 'simple': for interface in interfaces: status_icon = { 'current': 'āœ…', 'deprecated': 'āš ļø', 'legacy': 'šŸ”„', 'sunset': 'šŸŒ…', 'removed': 'āŒ' }.get(interface['status'], 'ā“') click.echo(f"{status_icon} {interface['command']} {interface['version']} ({interface['status']})") else: # Table format if interfaces: headers = ['Command', 'Version', 'Status', 'Deprecated', 'Removal', 'Commit', 'Description'] rows = [[ i['command'], i['version'], i['status'], i['deprecated_date'][:10] if i['deprecated_date'] else 'N/A', i['removal_date'][:10] if i['removal_date'] else 'N/A', i['git_commit'], i['description'][:30] + '...' if len(i['description']) > 30 else i['description'] ] for i in interfaces] click.echo(tabulate(rows, headers=headers, tablefmt='grid')) else: click.echo("No legacy interfaces found.") if config.get('verbose'): total = len(interfaces) by_status = {} for interface in interfaces: status = interface['status'] by_status[status] = by_status.get(status, 0) + 1 click.echo(f"\nSummary: {total} interfaces", err=True) for status, count in by_status.items(): click.echo(f" {status}: {count}", err=True) except Exception as e: click.echo(f"Error getting legacy status: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @legacy_management.command('analyze') @click.argument('command', required=False) @click.argument('version', required=False) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'detailed']), default='detailed', help='Output format') @pass_config def legacy_analyze(config, command, version, format): """ Analyze legacy interfaces for needed actions. Performs comprehensive analysis of legacy interfaces to identify deprecation candidates, migration opportunities, and cleanup needs. Examples: markitect legacy analyze markitect legacy analyze query markitect legacy analyze query v1.0 """ try: registry = LegacyRegistry() agent = LegacyAgent(registry=registry) if command and version: # Analyze specific interface interface = registry.get_legacy_interface(command, version) if not interface: click.echo(f"Legacy interface {command} {version} not found", err=True) sys.exit(1) analysis = { 'command': interface.command, 'version': interface.version, 'current_status': interface.status.value, 'deprecated_date': interface.deprecated_date, 'removal_date': interface.removal_date, 'git_commit': interface.git_commit, 'breaking_changes': interface.breaking_changes, 'migration_guide_available': bool(interface.migration_guide), 'recommendations': [] } # Add recommendations based on status if interface.status == LegacyStatus.DEPRECATED: analysis['recommendations'].append("Consider progressing to LEGACY status") elif interface.status == LegacyStatus.LEGACY: analysis['recommendations'].append("Monitor usage and prepare for SUNSET") elif interface.status == LegacyStatus.SUNSET: analysis['recommendations'].append("Schedule final removal") if not interface.migration_guide: analysis['recommendations'].append("Generate migration guide") if format == 'json': click.echo(json.dumps(analysis, indent=2)) elif format == 'yaml': import yaml click.echo(yaml.dump(analysis, default_flow_style=False)) else: click.echo(f"Analysis for {command} {version}") click.echo("=" * 40) click.echo(f"Status: {analysis['current_status']}") click.echo(f"Deprecated: {analysis['deprecated_date'] or 'N/A'}") click.echo(f"Removal: {analysis['removal_date'] or 'N/A'}") click.echo(f"Migration guide: {'Available' if analysis['migration_guide_available'] else 'Missing'}") if analysis['breaking_changes']: click.echo(f"\nBreaking changes ({len(analysis['breaking_changes'])}):") for change in analysis['breaking_changes']: click.echo(f" • {change}") if analysis['recommendations']: click.echo(f"\nRecommendations:") for rec in analysis['recommendations']: click.echo(f" • {rec}") else: # Analyze all interfaces candidates = registry.get_deprecation_candidates(days_ahead=30) usage_stats = registry.get_usage_statistics(days=30) analysis = { 'total_interfaces': sum(len(versions) for versions in registry._interfaces.values()), 'deprecation_candidates': len(candidates), 'recent_usage': usage_stats['total_usage'], 'cleanup_opportunities': 0, 'migration_guides_needed': 0 } # Count missing migration guides and cleanup opportunities for command_versions in registry._interfaces.values(): for interface in command_versions.values(): if not interface.migration_guide and interface.status in [LegacyStatus.LEGACY, LegacyStatus.SUNSET]: analysis['migration_guides_needed'] += 1 if interface.status == LegacyStatus.SUNSET: analysis['cleanup_opportunities'] += 1 if format == 'json': click.echo(json.dumps(analysis, indent=2)) elif format == 'yaml': import yaml click.echo(yaml.dump(analysis, default_flow_style=False)) else: click.echo("Legacy Interface Analysis") click.echo("=" * 30) click.echo(f"Total interfaces: {analysis['total_interfaces']}") click.echo(f"Deprecation candidates: {analysis['deprecation_candidates']}") click.echo(f"Recent usage events: {analysis['recent_usage']}") click.echo(f"Migration guides needed: {analysis['migration_guides_needed']}") click.echo(f"Cleanup opportunities: {analysis['cleanup_opportunities']}") if candidates: click.echo(f"\nUpcoming removals:") for candidate in candidates[:5]: # Show first 5 click.echo(f" • {candidate.command} {candidate.version} (removal: {candidate.removal_date})") except Exception as e: click.echo(f"Error analyzing legacy interfaces: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @legacy_management.command('migrate') @click.argument('command') @click.argument('version') @click.option('--to-version', default='current', help='Target version for migration') @pass_config def legacy_migrate(config, command, version, to_version): """ Get migration guidance for a legacy version. Provides detailed migration instructions and breaking change information for upgrading from a legacy interface version to current or another version. Examples: markitect legacy migrate query v1.0 markitect legacy migrate query v1.0 --to-version v2.0 """ try: registry = LegacyRegistry() interface = registry.get_legacy_interface(command, version) if not interface: click.echo(f"Legacy version {command} {version} not found", err=True) sys.exit(1) migration = registry.get_migration_path(command, version, to_version) click.echo(f"Migration Guide: {command} {version} → {to_version}") click.echo("=" * 60) if interface.migration_guide: click.echo(interface.migration_guide) else: click.echo("No specific migration guide available.") click.echo("Consider generating one with: markitect legacy generate-guide") if migration['breaking_changes']: click.echo("\nBreaking Changes:") for i, change in enumerate(migration['breaking_changes'], 1): click.echo(f"{i}. {change}") if migration['steps']: click.echo("\nMigration Steps:") for i, step in enumerate(migration['steps'], 1): click.echo(f"{i}. {step}") # Show additional context click.echo(f"\nInterface Details:") click.echo(f" Current status: {interface.status.value}") if interface.deprecated_date: click.echo(f" Deprecated: {interface.deprecated_date}") if interface.removal_date: click.echo(f" Removal scheduled: {interface.removal_date}") except Exception as e: click.echo(f"Error getting migration guide: {e}", err=True) sys.exit(1) @legacy_management.command('cleanup') @click.argument('command') @click.argument('version') @click.option('--force', is_flag=True, help='Force cleanup without confirmation') @click.option('--backup', is_flag=True, default=True, help='Create backup before cleanup') @pass_config def legacy_cleanup(config, command, version, force, backup): """ Clean up a specific legacy version. Permanently removes a legacy interface from the registry and optionally creates a backup for restoration if needed. Examples: markitect legacy cleanup query v1.0 markitect legacy cleanup query v1.0 --force markitect legacy cleanup query v1.0 --no-backup """ try: agent = LegacyAgent() if not force: interface = agent.registry.get_legacy_interface(command, version) if interface: click.echo(f"About to clean up {command} {version}") click.echo(f"Status: {interface.status.value}") if interface.removal_date: click.echo(f"Scheduled removal: {interface.removal_date}") if interface.status not in [LegacyStatus.SUNSET, LegacyStatus.REMOVED]: click.echo("Warning: Interface is not in SUNSET status") if not click.confirm("Are you sure you want to proceed?"): click.echo("Cleanup cancelled.") return # Configure backup behavior original_backup_config = agent.config.backup_before_cleanup agent.config.backup_before_cleanup = backup success = agent.force_cleanup(command, version) # Restore original config agent.config.backup_before_cleanup = original_backup_config if success: click.echo(f"āœ… Successfully cleaned up {command} {version}") if backup: click.echo("šŸ“¦ Backup created in agent data directory") else: click.echo(f"āŒ Failed to clean up {command} {version}", err=True) sys.exit(1) except Exception as e: click.echo(f"Error during cleanup: {e}", err=True) sys.exit(1) @legacy_management.command('agent-run') @click.option('--dry-run', is_flag=True, help='Show what would be done without executing') @pass_config def legacy_agent_run(config, dry_run): """ Run legacy agent maintenance cycle. Executes automated maintenance including deprecation progression, cleanup scheduling, migration guide generation, and user notifications. Examples: markitect legacy agent-run markitect legacy agent-run --dry-run """ try: agent = LegacyAgent() if dry_run: click.echo("DRY RUN: Legacy agent maintenance preview") click.echo("=" * 50) # Show what would be done agent_config = AgentConfig( auto_progression=False, # Disable actual changes cleanup_unused_days=agent.config.cleanup_unused_days, migration_guide_auto_generation=False, notification_threshold_days=agent.config.notification_threshold_days, max_concurrent_migrations=agent.config.max_concurrent_migrations, backup_before_cleanup=agent.config.backup_before_cleanup ) # Create a preview agent preview_agent = LegacyAgent(config=agent_config) # Analyze what would be done preview_agent._analyze_legacy_interfaces() pending_tasks = [task for task in preview_agent._tasks if not task.completed] if pending_tasks: click.echo(f"Would schedule {len(pending_tasks)} tasks:") for task in pending_tasks: click.echo(f" • {task.action.value}: {task.command}:{task.version}") else: click.echo("No maintenance tasks needed") else: click.echo("Running legacy agent maintenance...") summary = agent.run_maintenance() click.echo("Maintenance Summary") click.echo("=" * 20) click.echo(f"Tasks executed: {summary['tasks_executed']}") click.echo(f"Progressions: {summary['progressions']}") click.echo(f"Cleanups: {summary['cleanups']}") click.echo(f"Notifications: {summary['notifications']}") if summary['errors']: click.echo(f"\nErrors ({len(summary['errors'])}):") for error in summary['errors']: click.echo(f" • {error}") click.echo(f"\nStarted: {summary['started_at']}") click.echo(f"Completed: {summary['completed_at']}") except Exception as e: click.echo(f"Error running agent maintenance: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @legacy_management.command('agent-status') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format') @pass_config def legacy_agent_status(config, format): """ Show legacy agent status and statistics. Displays comprehensive information about the legacy agent including task queue status, configuration, and registry statistics. Examples: markitect legacy agent-status markitect legacy agent-status --format json """ try: agent = LegacyAgent() status = agent.get_agent_status() if format == 'json': click.echo(json.dumps(status, indent=2)) elif format == 'yaml': import yaml click.echo(yaml.dump(status, default_flow_style=False)) else: click.echo("Legacy Agent Status") click.echo("=" * 30) click.echo(f"Data Directory: {status['data_directory']}") click.echo(f"Auto Progression: {'Enabled' if status['config']['auto_progression'] else 'Disabled'}") click.echo(f"Cleanup After: {status['config']['cleanup_unused_days']} days") click.echo(f"\nTask Queue:") click.echo(f" Total: {status['tasks']['total']}") click.echo(f" Pending: {status['tasks']['pending']}") click.echo(f" Completed: {status['tasks']['completed']}") if status['next_maintenance']: click.echo(f"\nNext Maintenance: {status['next_maintenance']}") click.echo(f"\nRegistry Statistics:") for stat_name, stat_value in status['registry_stats'].items(): if stat_name == 'commands': click.echo(f" Commands: {', '.join(stat_value) if stat_value else 'none'}") else: click.echo(f" {stat_name}: {stat_value}") except Exception as e: click.echo(f"Error getting agent status: {e}", err=True) sys.exit(1) @legacy_management.command('usage-stats') @click.option('--command', help='Filter by specific command') @click.option('--days', type=int, default=30, help='Number of days to analyze') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format') @pass_config def legacy_usage_stats(config, command, days, format): """ Show usage statistics for legacy interfaces. Displays usage patterns to help make informed decisions about deprecation timelines and cleanup priorities. Examples: markitect legacy usage-stats markitect legacy usage-stats --command query markitect legacy usage-stats --days 90 --format json """ try: registry = LegacyRegistry() stats = registry.get_usage_statistics(command=command, days=days) if format == 'json': click.echo(json.dumps(stats, indent=2)) elif format == 'yaml': import yaml click.echo(yaml.dump(stats, default_flow_style=False)) else: click.echo(f"Legacy Interface Usage ({days} days)") click.echo("=" * 40) click.echo(f"Total usage events: {stats['total_usage']}") if stats['by_command']: click.echo(f"\nBy Command:") for cmd, versions in stats['by_command'].items(): total_cmd_usage = sum(v['usage_count'] for v in versions.values()) click.echo(f" {cmd}: {total_cmd_usage} uses") for version, data in versions.items(): click.echo(f" {version}: {data['usage_count']} (last: {data['last_used'][:10]})") if stats['by_version']: click.echo(f"\nMost Used Versions:") sorted_versions = sorted(stats['by_version'].items(), key=lambda x: x[1], reverse=True) for version_key, count in sorted_versions[:10]: click.echo(f" {version_key}: {count} uses") if config.get('verbose'): click.echo(f"\nAnalysis period: {days} days", err=True) if command: click.echo(f"Filtered to command: {command}", err=True) except Exception as e: click.echo(f"Error getting usage statistics: {e}", err=True) sys.exit(1) @legacy_management.command('generate-guide') @click.argument('command') @click.argument('version') @click.option('--output', '-o', type=click.Path(), help='Output file (default: stdout)') @pass_config def legacy_generate_guide(config, command, version, output): """ Generate migration guide for a legacy interface. Creates detailed migration documentation for upgrading from a legacy interface version to the current implementation. Examples: markitect legacy generate-guide query v1.0 markitect legacy generate-guide query v1.0 --output migration_guide.md """ try: registry = LegacyRegistry() interface = registry.get_legacy_interface(command, version) if not interface: click.echo(f"Legacy interface {command} {version} not found", err=True) sys.exit(1) # Generate guide content guide_content = f"""# Migration Guide: {command} {version} → Current ## Overview This guide helps you migrate from the legacy `{command}` {version} interface to the current implementation. **Status**: {interface.status.value} **Deprecated**: {interface.deprecated_date or 'Not specified'} **Removal Date**: {interface.removal_date or 'Not scheduled'} ## Breaking Changes """ if interface.breaking_changes: for i, change in enumerate(interface.breaking_changes, 1): guide_content += f"{i}. {change}\n" else: guide_content += "No specific breaking changes documented.\n" guide_content += f""" ## Migration Steps 1. **Remove the legacy flag**: Stop using `--legacy-{version.replace('.', '-')}` 2. **Update command syntax**: Review the current command documentation 3. **Test thoroughly**: Verify that your use cases work with the new interface 4. **Update automation**: Modify any scripts or tools that use the legacy interface ## Getting Help - Run: `markitect help {command}` - Check the documentation for current syntax - Review the changelog for detailed changes ## Example Migration ```bash # Old (legacy {version}) markitect {command} --legacy-{version.replace('.', '-')} [arguments] # New (current) markitect {command} [arguments] ``` For specific parameter changes, refer to the breaking changes section above. """ if interface.migration_guide: guide_content += f"\n## Additional Notes\n\n{interface.migration_guide}\n" # Output if output: with open(output, 'w', encoding='utf-8') as f: f.write(guide_content) click.echo(f"āœ… Migration guide written to: {output}") else: click.echo(guide_content) # Update interface with generated guide if it didn't have one if not interface.migration_guide: interface.migration_guide = guide_content # Note: In a full implementation, this would save back to registry except Exception as e: click.echo(f"Error generating migration guide: {e}", err=True) sys.exit(1) def main(): """ Main entry point for the CLI. This function is referenced in pyproject.toml console_scripts. """ try: cli(standalone_mode=False) except KeyboardInterrupt: click.echo("\nOperation interrupted by user.", err=True) sys.exit(130) # Standard exit code for SIGINT except Exception as e: click.echo(f"Unexpected error: {e}", err=True) sys.exit(1) @cli.command('config-stats') @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') @pass_config def config_stats(config, format): """ Display configuration statistics and status information. Shows comprehensive configuration information including current settings, file sources, validation status, and workspace information. Part of the config subsystem following the *-stats command convention. Examples: markitect config-stats markitect config-stats --format json markitect config-stats --format yaml """ try: # Try to import the config system try: import sys from pathlib import Path # Add the CLI commands directory to path cli_path = Path(__file__).parent.parent / "cli" / "commands" if cli_path.exists(): sys.path.insert(0, str(cli_path.parent)) from commands.config import ConfigCommands # Use the existing config commands system config_commands = ConfigCommands() config_commands.show_config(show_sensitive=False) return except ImportError: pass # Fallback: Simple config stats if full system isn't available config_info = { 'config_file': config.get('config_file', 'None specified'), 'database_path': config.get('database_path', 'Default location'), 'verbose_mode': config.get('verbose', False), 'working_directory': os.getcwd() } # Add environment variables relevant to config env_vars = {} config_env_vars = ['MARKITECT_CONFIG', 'MARKITECT_DATABASE', 'MARKITECT_MODE'] for var in config_env_vars: value = os.getenv(var) env_vars[var] = value if value else 'Not set' config_info['environment_variables'] = env_vars # Format output according to requested format if format == 'json': click.echo(json.dumps(config_info, indent=2)) elif format == 'yaml': click.echo(yaml.dump(config_info, default_flow_style=False)) elif format == 'simple': for key, value in config_info.items(): if key == 'environment_variables': click.echo(f"{key}:") for env_key, env_value in value.items(): click.echo(f" {env_key}: {env_value}") else: click.echo(f"{key}: {value}") else: # table format click.echo("šŸ“Š Configuration Statistics") click.echo("=" * 50) # Basic config click.echo("\nšŸ”§ Basic Configuration:") for key, value in config_info.items(): if key != 'environment_variables': click.echo(f" {key.replace('_', ' ').title()}: {value}") # Environment variables click.echo("\nšŸŒ Environment Variables:") for env_key, env_value in config_info['environment_variables'].items(): status_icon = "āœ…" if env_value != 'Not set' else "āŒ" click.echo(f" {status_icon} {env_key}: {env_value}") # Basic validation click.echo("\nāœ… Basic Validation:") if config.get('database_path'): db_path = Path(config['database_path']) db_exists = db_path.exists() if db_path.is_absolute() else False status = "āœ…" if db_exists else "āš ļø" click.echo(f" {status} Database accessible: {db_exists}") click.echo(f" āœ… Working directory accessible: {os.access(os.getcwd(), os.R_OK)}") except Exception as e: click.echo(f"Error getting configuration statistics: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # Content Commands (Issue #38) from markitect_content.commands import content_get, content_stats # Register content commands cli.add_command(content_get) cli.add_command(content_stats) # Frontmatter Commands (Issue #38 - Cycle 2) from .matter_frontmatter.commands import frontmatter_get, frontmatter_set, frontmatter_keys, frontmatter_stats # Register frontmatter commands cli.add_command(frontmatter_get) cli.add_command(frontmatter_set) cli.add_command(frontmatter_keys) cli.add_command(frontmatter_stats) # Contentmatter Commands (Issue #38 - Cycle 3) from .matter_contentmatter.commands import contentmatter_get, contentmatter_set, contentmatter_keys, contentmatter_stats # Register contentmatter commands cli.add_command(contentmatter_get) cli.add_command(contentmatter_set) cli.add_command(contentmatter_keys) cli.add_command(contentmatter_stats) # Tailmatter Commands (Issue #38 - Cycles 4-5) from .matter_tailmatter.commands import tailmatter_get, tailmatter_set, tailmatter_keys, tailmatter_stats, tailmatter_check # Register tailmatter commands cli.add_command(tailmatter_get) cli.add_command(tailmatter_set) cli.add_command(tailmatter_keys) cli.add_command(tailmatter_stats) cli.add_command(tailmatter_check) # Template Rendering Command (Issue #65) @cli.command(name='template-render') @click.argument('template_file', type=click.Path(exists=True)) @click.argument('data_file', type=click.Path(exists=True)) @click.option('--output', '-o', type=click.Path(), help='Output file path (default: stdout)') @click.option('--strict', is_flag=True, default=True, help='Strict mode: fail on missing variables (default: True)') @click.option('--lenient', is_flag=True, help='Lenient mode: preserve placeholders for missing variables') @click.option('--validate', is_flag=True, help='Validate template syntax before rendering') @click.option('--check-data', is_flag=True, help='Check data completeness before rendering') @click.option('--format', 'data_format', type=click.Choice(['json', 'yaml', 'auto']), default='auto', help='Data file format') @pass_config def template_render(config, template_file, data_file, output, strict, lenient, validate, check_data, data_format): """ Render a template with data to generate documents. This command takes a template file containing variables in {{variable}} format and a data file (JSON or YAML) containing the values to substitute. Examples: markitect template-render invoice.md data.json markitect template-render report.md data.yaml --output report.pdf markitect template-render template.md data.json --lenient --validate """ try: from .template.engine import TemplateEngine # Initialize template engine engine = TemplateEngine() # Read template file with open(template_file, 'r', encoding='utf-8') as f: template_content = f.read() # Determine data format if data_format == 'auto': if data_file.endswith('.json'): data_format = 'json' elif data_file.endswith('.yaml') or data_file.endswith('.yml'): data_format = 'yaml' else: data_format = 'json' # Default to JSON # Read data file with open(data_file, 'r', encoding='utf-8') as f: if data_format == 'json': data = json.load(f) else: # yaml data = yaml.safe_load(f) # Validate template if requested if validate: errors = engine.validate_template(template_content) if errors: click.echo("Template validation errors:", err=True) for error in errors: click.echo(f" - {error}", err=True) sys.exit(1) # Check data completeness if requested if check_data: completeness = engine.check_data_completeness(template_content, data) if completeness['missing']: click.echo("Missing variables in data:", err=True) for var in completeness['missing']: click.echo(f" - {var}", err=True) click.echo(f"Data completeness: {completeness['completeness']:.1%}", err=True) if strict: sys.exit(1) # Determine render mode render_strict = strict and not lenient # Render template try: result = engine.render(template_content, data, strict=render_strict) # Output result if output: with open(output, 'w', encoding='utf-8') as f: f.write(result) click.echo(f"Template rendered successfully to {output}") else: click.echo(result) except Exception as e: click.echo(f"Rendering failed: {e}", err=True) sys.exit(1) except ImportError: click.echo("Template engine not available. Make sure it's properly installed.", err=True) sys.exit(1) except FileNotFoundError as e: click.echo(f"File not found: {e}", err=True) sys.exit(1) except json.JSONDecodeError as e: click.echo(f"JSON parsing error: {e}", err=True) sys.exit(1) except yaml.YAMLError as e: click.echo(f"YAML parsing error: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Unexpected error: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # Performance Validation Commands (Issue #16) @cli.command(name='perf-benchmark') @click.option('--operations', '-n', type=int, default=1000, help='Number of operations to benchmark') @click.option('--test-type', type=click.Choice(['ingest', 'query', 'template', 'all']), default='all', help='Type of performance test') @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'simple']), default='table', help='Output format') @click.option('--output', '-o', type=click.Path(), help='Output file for results') @pass_config def perf_benchmark(config, operations, test_type, output_format, output): """Run performance benchmarks and measure system performance. Execute performance benchmarks to measure MarkiTect's performance across different operations including document ingestion, querying, and template rendering. Examples: markitect perf-benchmark --operations 500 --test-type ingest markitect perf-benchmark --test-type template --format json -o results.json markitect perf-benchmark --operations 2000 --test-type all """ try: import time import tempfile import json as json_lib from pathlib import Path results = {} start_total = time.time() if test_type in ['ingest', 'all']: # Benchmark document ingestion click.echo("šŸš€ Running ingestion benchmark...") with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) # Create test documents test_docs = [] for i in range(min(operations, 100)): # Limit to 100 docs for ingestion doc_path = temp_path / f"bench_doc_{i}.md" content = f"# Benchmark Document {i}\n\nThis is test content for performance measurement.\n\n## Details\n\nDocument number: {i}\nContent: {'Lorem ipsum ' * 20}" doc_path.write_text(content) test_docs.append(str(doc_path)) # Benchmark ingestion start_time = time.time() for doc_path in test_docs: from .database import DatabaseManager db = DatabaseManager(config['database_path']) try: # Use internal methods to avoid CLI overhead db.process_file(doc_path) except Exception: pass # Continue benchmarking even if some fail ingest_time = time.time() - start_time ingest_rate = len(test_docs) / ingest_time if ingest_time > 0 else 0 results['ingestion'] = { 'operations': len(test_docs), 'time_seconds': round(ingest_time, 3), 'operations_per_second': round(ingest_rate, 1), 'status': 'completed' } if test_type in ['query', 'all']: # Benchmark database queries click.echo("šŸ” Running query benchmark...") start_time = time.time() query_count = 0 try: from .database import DatabaseManager db = DatabaseManager(config['database_path']) # Run various queries for i in range(min(operations, 500)): # Limit query operations try: # Test different query types if i % 3 == 0: db.list_markdown_files() elif i % 3 == 1: db.list_schema_files() else: db.execute_query("SELECT COUNT(*) FROM markdown_files") query_count += 1 except Exception: pass # Continue benchmarking query_time = time.time() - start_time query_rate = query_count / query_time if query_time > 0 else 0 results['querying'] = { 'operations': query_count, 'time_seconds': round(query_time, 3), 'operations_per_second': round(query_rate, 1), 'status': 'completed' } except Exception as e: results['querying'] = { 'status': 'failed', 'error': str(e) } if test_type in ['template', 'all']: # Benchmark template rendering click.echo("šŸ“„ Running template rendering benchmark...") template_content = "# {{title}}\n\nHello {{user.name}}, welcome to {{company}}!\n\n## Stats\n- Count: {{count}}\n- Value: {{data.value}}" template_data = { "title": "Benchmark Template", "user": {"name": "Test User"}, "company": "MarkiTect", "count": 42, "data": {"value": 3.14159} } start_time = time.time() template_count = 0 try: from .template.engine import TemplateEngine engine = TemplateEngine() for i in range(min(operations, 1000)): # Template operations try: result = engine.render(template_content, template_data) template_count += 1 except Exception: pass # Continue benchmarking template_time = time.time() - start_time template_rate = template_count / template_time if template_time > 0 else 0 results['template_rendering'] = { 'operations': template_count, 'time_seconds': round(template_time, 3), 'operations_per_second': round(template_rate, 1), 'status': 'completed' } except Exception as e: results['template_rendering'] = { 'status': 'failed', 'error': str(e) } total_time = time.time() - start_total results['summary'] = { 'total_time_seconds': round(total_time, 3), 'test_type': test_type, 'requested_operations': operations, 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S') } # Format and output results if output_format == 'json': output_text = json_lib.dumps(results, indent=2) elif output_format == 'simple': output_lines = [f"Performance Benchmark Results - {test_type}"] for category, data in results.items(): if category != 'summary': if data.get('status') == 'completed': output_lines.append(f"{category}: {data['operations']} ops in {data['time_seconds']}s ({data['operations_per_second']} ops/sec)") else: output_lines.append(f"{category}: {data.get('status', 'unknown')}") output_lines.append(f"Total time: {results['summary']['total_time_seconds']}s") output_text = '\n'.join(output_lines) else: # table format from tabulate import tabulate table_data = [] for category, data in results.items(): if category != 'summary': if data.get('status') == 'completed': table_data.append([ category.replace('_', ' ').title(), data['operations'], f"{data['time_seconds']}s", f"{data['operations_per_second']:.1f}", data['status'] ]) else: table_data.append([ category.replace('_', ' ').title(), '-', '-', '-', data.get('status', 'unknown') ]) output_text = tabulate( table_data, headers=['Operation', 'Count', 'Time', 'Ops/Sec', 'Status'], tablefmt='grid' ) output_text += f"\n\nTotal benchmark time: {results['summary']['total_time_seconds']}s" if output: with open(output, 'w') as f: f.write(output_text) click.echo(f"āœ… Benchmark results saved to {output}") else: click.echo(output_text) except ImportError as e: click.echo(f"Missing dependency for benchmarking: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Benchmark failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='perf-validate') @click.option('--threshold-ops', type=int, default=100, help='Minimum operations per second threshold') @click.option('--threshold-memory', type=int, default=100, help='Maximum memory usage in MB') @click.option('--test-duration', type=int, default=10, help='Test duration in seconds') @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'simple']), default='table', help='Output format') @pass_config def perf_validate(config, threshold_ops, threshold_memory, test_duration, output_format): """Validate system performance against defined thresholds. Run performance validation tests to ensure MarkiTect meets performance requirements for production use. Examples: markitect perf-validate --threshold-ops 200 --threshold-memory 50 markitect perf-validate --test-duration 30 --format json """ try: import time import tempfile import json as json_lib from pathlib import Path validation_results = {} start_time = time.time() # Memory monitoring setup try: import psutil import os process = psutil.Process(os.getpid()) initial_memory_mb = process.memory_info().rss / (1024 * 1024) memory_available = True except ImportError: memory_available = False initial_memory_mb = 0 click.echo(f"šŸ” Starting performance validation (duration: {test_duration}s)...") # Test 1: Template rendering performance click.echo("šŸ“„ Testing template rendering performance...") template_content = "# {{title}}\n\nProcessing {{item.name}} - {{item.value}}" operations_completed = 0 try: from .template.engine import TemplateEngine engine = TemplateEngine() test_start = time.time() while (time.time() - test_start) < (test_duration / 3): # Use 1/3 of time for templates template_data = { "title": f"Test Document {operations_completed}", "item": {"name": f"Item {operations_completed}", "value": operations_completed * 1.5} } result = engine.render(template_content, template_data) operations_completed += 1 template_duration = time.time() - test_start template_rate = operations_completed / template_duration if template_duration > 0 else 0 validation_results['template_rendering'] = { 'operations_completed': operations_completed, 'duration_seconds': round(template_duration, 3), 'operations_per_second': round(template_rate, 1), 'threshold_met': template_rate >= threshold_ops, 'threshold_value': threshold_ops } except Exception as e: validation_results['template_rendering'] = { 'status': 'failed', 'error': str(e), 'threshold_met': False } # Test 2: Database operations performance click.echo("šŸ—„ļø Testing database operations performance...") db_operations = 0 try: from .database import DatabaseManager db = DatabaseManager(config['database_path']) test_start = time.time() while (time.time() - test_start) < (test_duration / 3): # Use 1/3 of time for DB try: # Rotate through different query types if db_operations % 3 == 0: db.list_markdown_files() elif db_operations % 3 == 1: db.list_schema_files() else: db.execute_query("SELECT COUNT(*) FROM markdown_files") db_operations += 1 except Exception: pass # Continue testing db_duration = time.time() - test_start db_rate = db_operations / db_duration if db_duration > 0 else 0 validation_results['database_operations'] = { 'operations_completed': db_operations, 'duration_seconds': round(db_duration, 3), 'operations_per_second': round(db_rate, 1), 'threshold_met': db_rate >= threshold_ops, 'threshold_value': threshold_ops } except Exception as e: validation_results['database_operations'] = { 'status': 'failed', 'error': str(e), 'threshold_met': False } # Test 3: Memory usage validation if memory_available: click.echo("🧠 Testing memory usage...") current_memory_mb = process.memory_info().rss / (1024 * 1024) memory_increase_mb = current_memory_mb - initial_memory_mb validation_results['memory_usage'] = { 'initial_memory_mb': round(initial_memory_mb, 2), 'current_memory_mb': round(current_memory_mb, 2), 'memory_increase_mb': round(memory_increase_mb, 2), 'threshold_met': memory_increase_mb <= threshold_memory, 'threshold_value': threshold_memory } else: validation_results['memory_usage'] = { 'status': 'skipped', 'reason': 'psutil not available', 'threshold_met': True # Assume pass if we can't measure } # Overall validation summary total_duration = time.time() - start_time all_tests_passed = all( result.get('threshold_met', False) for result in validation_results.values() if 'threshold_met' in result ) validation_results['summary'] = { 'total_duration_seconds': round(total_duration, 3), 'validation_passed': all_tests_passed, 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'), 'thresholds': { 'operations_per_second': threshold_ops, 'memory_mb': threshold_memory, 'test_duration': test_duration } } # Format output if output_format == 'json': output_text = json_lib.dumps(validation_results, indent=2) elif output_format == 'simple': output_lines = ["Performance Validation Results"] for test_name, data in validation_results.items(): if test_name != 'summary': if 'threshold_met' in data: status = "āœ… PASS" if data['threshold_met'] else "āŒ FAIL" if 'operations_per_second' in data: output_lines.append(f"{test_name}: {data['operations_per_second']:.1f} ops/sec {status}") elif 'memory_increase_mb' in data: output_lines.append(f"{test_name}: {data['memory_increase_mb']:.2f} MB increase {status}") else: output_lines.append(f"{test_name}: {data.get('status', 'unknown')}") overall_status = "āœ… VALIDATION PASSED" if all_tests_passed else "āŒ VALIDATION FAILED" output_lines.append(f"\nOverall: {overall_status}") output_text = '\n'.join(output_lines) else: # table format from tabulate import tabulate table_data = [] for test_name, data in validation_results.items(): if test_name != 'summary': if 'threshold_met' in data: status = "āœ… PASS" if data['threshold_met'] else "āŒ FAIL" if 'operations_per_second' in data: value = f"{data['operations_per_second']:.1f} ops/sec" threshold = f">= {data['threshold_value']} ops/sec" elif 'memory_increase_mb' in data: value = f"{data['memory_increase_mb']:.2f} MB" threshold = f"<= {data['threshold_value']} MB" else: value = "N/A" threshold = "N/A" table_data.append([ test_name.replace('_', ' ').title(), value, threshold, status ]) else: table_data.append([ test_name.replace('_', ' ').title(), data.get('status', 'unknown'), '-', 'āš ļø SKIP' ]) output_text = tabulate( table_data, headers=['Test', 'Result', 'Threshold', 'Status'], tablefmt='grid' ) overall_status = "āœ… VALIDATION PASSED" if all_tests_passed else "āŒ VALIDATION FAILED" output_text += f"\n\nOverall Result: {overall_status}" output_text += f"\nTotal validation time: {validation_results['summary']['total_duration_seconds']}s" click.echo(output_text) # Exit with appropriate code if not all_tests_passed: sys.exit(1) except Exception as e: click.echo(f"Performance validation failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='perf-monitor') @click.option('--duration', type=int, default=60, help='Monitoring duration in seconds') @click.option('--interval', type=int, default=5, help='Monitoring interval in seconds') @click.option('--output', '-o', type=click.Path(), help='Output file for monitoring data') @click.option('--format', 'output_format', type=click.Choice(['json', 'csv', 'simple']), default='simple', help='Output format') @pass_config def perf_monitor(config, duration, interval, output, output_format): """Monitor system performance over time. Continuously monitor MarkiTect performance metrics including memory usage, cache effectiveness, and database performance. Examples: markitect perf-monitor --duration 300 --interval 10 markitect perf-monitor --duration 60 --format json -o monitoring.json """ try: import time import json as json_lib # Memory monitoring setup try: import psutil import os process = psutil.Process(os.getpid()) memory_available = True except ImportError: memory_available = False click.echo(f"šŸ“Š Starting performance monitoring (duration: {duration}s, interval: {interval}s)...") monitoring_data = [] start_time = time.time() try: while (time.time() - start_time) < duration: measurement_time = time.time() data_point = { 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'), 'elapsed_seconds': round(measurement_time - start_time, 1) } # Memory metrics if memory_available: memory_info = process.memory_info() data_point.update({ 'memory_rss_mb': round(memory_info.rss / (1024 * 1024), 2), 'memory_vms_mb': round(memory_info.vms / (1024 * 1024), 2) }) # System metrics try: from .database import DatabaseManager db = DatabaseManager(config['database_path']) stats = db.get_statistics() data_point.update({ 'database_files': stats.get('file_count', 0), 'database_size_kb': stats.get('db_size_bytes', 0) / 1024 }) except Exception: data_point.update({ 'database_files': 'error', 'database_size_kb': 'error' }) # Cache metrics try: # Get cache info (simplified) cache_dir = Path('.ast_cache') if cache_dir.exists(): cache_files = len(list(cache_dir.glob('*'))) cache_size = sum(f.stat().st_size for f in cache_dir.glob('*') if f.is_file()) data_point.update({ 'cache_files': cache_files, 'cache_size_kb': round(cache_size / 1024, 2) }) else: data_point.update({ 'cache_files': 0, 'cache_size_kb': 0 }) except Exception: data_point.update({ 'cache_files': 'error', 'cache_size_kb': 'error' }) monitoring_data.append(data_point) # Display current status if memory_available: click.echo(f"ā±ļø {data_point['elapsed_seconds']:>6.1f}s | " f"Memory: {data_point['memory_rss_mb']:>6.1f}MB | " f"DB Files: {data_point['database_files']:>4} | " f"Cache: {data_point['cache_files']:>3} files") else: click.echo(f"ā±ļø {data_point['elapsed_seconds']:>6.1f}s | " f"DB Files: {data_point['database_files']:>4} | " f"Cache: {data_point['cache_files']:>3} files") # Wait for next interval time.sleep(interval) except KeyboardInterrupt: click.echo("\nā¹ļø Monitoring stopped by user") # Format output total_duration = time.time() - start_time if output_format == 'json': output_data = { 'monitoring_session': { 'start_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)), 'duration_seconds': round(total_duration, 1), 'interval_seconds': interval, 'data_points': len(monitoring_data) }, 'measurements': monitoring_data } output_text = json_lib.dumps(output_data, indent=2) elif output_format == 'csv': if monitoring_data: headers = list(monitoring_data[0].keys()) lines = [','.join(headers)] for data_point in monitoring_data: values = [str(data_point.get(header, '')) for header in headers] lines.append(','.join(values)) output_text = '\n'.join(lines) else: output_text = "No monitoring data collected" else: # simple format output_lines = [ f"Performance Monitoring Summary", f"Duration: {total_duration:.1f}s", f"Data points: {len(monitoring_data)}", f"Interval: {interval}s" ] if monitoring_data and memory_available: memory_values = [d['memory_rss_mb'] for d in monitoring_data if isinstance(d.get('memory_rss_mb'), (int, float))] if memory_values: output_lines.extend([ f"Memory usage: {min(memory_values):.1f}MB - {max(memory_values):.1f}MB", f"Average memory: {sum(memory_values)/len(memory_values):.1f}MB" ]) output_text = '\n'.join(output_lines) if output: with open(output, 'w') as f: f.write(output_text) click.echo(f"šŸ“ Monitoring data saved to {output}") else: click.echo("\n" + output_text) except Exception as e: click.echo(f"Performance monitoring failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='perf-track') @click.option('--notes', '-n', type=str, default="", help='Optional notes for this performance snapshot') @click.option('--output', '-o', type=click.Path(), help='Save results to file') @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'simple']), default='table', help='Output format') @pass_config def perf_track(config, notes, output, output_format): """Record a performance snapshot and track it over time. Run comprehensive performance benchmarks and store the results in a tracking database for historical analysis and trend monitoring. Examples: markitect perf-track --notes "After optimization changes" markitect perf-track --format json -o perf-snapshot.json markitect perf-track --notes "Baseline before refactor" """ try: import time import tempfile import json as json_lib from pathlib import Path from .performance_tracker import PerformanceTracker # Initialize performance tracker tracker_db = Path(config['database_path']).parent / 'performance_tracking.db' tracker = PerformanceTracker(str(tracker_db)) click.echo("šŸ“Š Running performance benchmark for tracking...") # Run comprehensive benchmarks start_time = time.time() # Template rendering benchmark template_ops = 0 try: from .template.engine import TemplateEngine engine = TemplateEngine() template_content = "# {{title}}\n\nProcessing {{item.name}} - {{item.value}}" test_start = time.time() while (time.time() - test_start) < 2.0: # 2 second test template_data = { "title": f"Benchmark {template_ops}", "item": {"name": f"Item {template_ops}", "value": template_ops * 1.5} } result = engine.render(template_content, template_data) template_ops += 1 template_duration = time.time() - test_start template_rate = template_ops / template_duration if template_duration > 0 else 0 except Exception as e: template_rate = 0 click.echo(f"āš ļø Template benchmark failed: {e}", err=True) # Database operations benchmark database_ops = 0 try: from .database import DatabaseManager db = DatabaseManager(config['database_path']) test_start = time.time() while (time.time() - test_start) < 2.0: # 2 second test try: if database_ops % 3 == 0: db.list_markdown_files() elif database_ops % 3 == 1: db.list_schema_files() else: db.execute_query("SELECT COUNT(*) FROM markdown_files") database_ops += 1 except Exception: pass database_duration = time.time() - test_start database_rate = database_ops / database_duration if database_duration > 0 else 0 except Exception as e: database_rate = 0 click.echo(f"āš ļø Database benchmark failed: {e}", err=True) # Ingestion benchmark (limited to 20 operations for speed) ingestion_ops = 0 try: with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) test_docs = [] for i in range(20): # Limited set for tracking doc_path = temp_path / f"track_doc_{i}.md" content = f"# Track Document {i}\n\nContent for benchmark tracking.\n\n## Details\n\nDocument: {i}" doc_path.write_text(content) test_docs.append(str(doc_path)) test_start = time.time() for doc_path in test_docs: try: db = DatabaseManager(config['database_path']) db.store_markdown_file(doc_path, Path(doc_path).read_text()) ingestion_ops += 1 except Exception: pass ingestion_duration = time.time() - test_start ingestion_rate = ingestion_ops / ingestion_duration if ingestion_duration > 0 else 0 except Exception as e: ingestion_rate = 0 click.echo(f"āš ļø Ingestion benchmark failed: {e}", err=True) # Memory usage measurement memory_mb = 50.0 # Default fallback try: import psutil import os process = psutil.Process(os.getpid()) memory_mb = process.memory_info().rss / (1024 * 1024) except ImportError: pass # Store performance snapshot snapshot_id = tracker.store_performance_snapshot( template_ops=template_rate, database_ops=database_rate, ingestion_ops=ingestion_rate, memory_mb=memory_mb, notes=notes ) total_duration = time.time() - start_time # Get performance summary including the new snapshot summary = tracker.get_performance_summary() # Format output if output_format == 'json': output_data = { "snapshot_id": snapshot_id, "performance_results": { "template_ops_per_sec": round(template_rate, 1), "database_ops_per_sec": round(database_rate, 1), "ingestion_ops_per_sec": round(ingestion_rate, 1), "memory_usage_mb": round(memory_mb, 2), "performance_index": summary["latest_snapshot"]["performance_index"] }, "tracking_summary": summary, "benchmark_duration_seconds": round(total_duration, 3), "timestamp": summary["latest_snapshot"]["timestamp"], "notes": notes } output_text = json_lib.dumps(output_data, indent=2) elif output_format == 'table': # Current performance table perf_data = [ ["Template Rendering", f"{template_rate:.1f} ops/sec"], ["Database Operations", f"{database_rate:.1f} ops/sec"], ["Document Ingestion", f"{ingestion_rate:.1f} ops/sec"], ["Memory Usage", f"{memory_mb:.1f} MB"], ["Performance Index", f"{summary['latest_snapshot']['performance_index']:.1f}/100"] ] output_lines = [ f"šŸ“Š Performance Snapshot #{snapshot_id} Recorded", "", tabulate(perf_data, headers=["Metric", "Value"], tablefmt="grid"), "", f"šŸŽÆ Performance Index: {summary['latest_snapshot']['performance_index']:.1f}/100" ] # Add trend information if available if summary.get("trend_analysis", {}).get("trend") != "insufficient_data": trend = summary["trend_analysis"] trend_emoji = "šŸ“ˆ" if trend["trend"] == "improving" else "šŸ“‰" if trend["trend"] == "degrading" else "šŸ“Š" output_lines.extend([ "", f"{trend_emoji} Trend Analysis (30 days):", f" Direction: {trend['trend'].title()}", f" Change: {trend['trend_change_percent']:+.1f}%", f" Snapshots: {trend['snapshot_count']}" ]) if notes: output_lines.extend(["", f"šŸ“ Notes: {notes}"]) output_text = '\n'.join(output_lines) else: # simple format output_lines = [ f"Performance Index: {summary['latest_snapshot']['performance_index']:.1f}/100", f"Template: {template_rate:.1f} ops/sec", f"Database: {database_rate:.1f} ops/sec", f"Ingestion: {ingestion_rate:.1f} ops/sec", f"Memory: {memory_mb:.1f} MB", f"Snapshot ID: {snapshot_id}" ] if notes: output_lines.append(f"Notes: {notes}") output_text = '\n'.join(output_lines) if output: with open(output, 'w') as f: f.write(output_text) click.echo(f"šŸ“ Performance snapshot saved to {output}") else: click.echo(output_text) except Exception as e: click.echo(f"Performance tracking failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='perf-history') @click.option('--limit', '-l', type=int, default=10, help='Number of recent snapshots to show') @click.option('--trend-days', type=int, default=30, help='Days to analyze for trend') @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'simple']), default='table', help='Output format') @click.option('--output', '-o', type=click.Path(), help='Save results to file') @pass_config def perf_history(config, limit, trend_days, output_format, output): """Show performance history and trend analysis. Display historical performance data with trend analysis to track system performance evolution over time. Examples: markitect perf-history --limit 20 markitect perf-history --trend-days 7 --format json markitect perf-history --format table -o performance-report.txt """ try: import json as json_lib from pathlib import Path from .performance_tracker import PerformanceTracker # Initialize performance tracker tracker_db = Path(config['database_path']).parent / 'performance_tracking.db' tracker = PerformanceTracker(str(tracker_db)) # Get performance data history = tracker.get_performance_history(limit=limit) summary = tracker.get_performance_summary() trend_analysis = tracker.analyze_performance_trend(days=trend_days) if not history: click.echo("šŸ“Š No performance data available. Run 'markitect perf-track' to create a baseline.") return # Format output if output_format == 'json': output_data = { "performance_summary": summary, "trend_analysis": trend_analysis, "history": [ { "timestamp": snapshot.timestamp, "performance_index": snapshot.performance_index, "git_commit": snapshot.git_commit, "template_ops_per_sec": snapshot.template_ops_per_sec, "database_ops_per_sec": snapshot.database_ops_per_sec, "ingestion_ops_per_sec": snapshot.ingestion_ops_per_sec, "memory_usage_mb": snapshot.memory_usage_mb, "notes": snapshot.notes } for snapshot in history ], "analysis_parameters": { "history_limit": limit, "trend_analysis_days": trend_days } } output_text = json_lib.dumps(output_data, indent=2) elif output_format == 'table': # Summary section latest = summary["latest_snapshot"] output_lines = [ "šŸ“Š MarkiTect Performance History & Analysis", "=" * 50, "", f"šŸŽÆ Current Performance Index: {latest['performance_index']:.1f}/100" ] # Trend analysis if trend_analysis.get("trend") != "insufficient_data": trend_emoji = "šŸ“ˆ" if trend_analysis["trend"] == "improving" else "šŸ“‰" if trend_analysis["trend"] == "degrading" else "šŸ“Š" output_lines.extend([ f"{trend_emoji} Trend ({trend_days} days): {trend_analysis['trend'].title()}", f" Change: {trend_analysis['trend_change_percent']:+.1f}% ({trend_analysis['trend_change_points']:+.2f} points)", f" Range: {trend_analysis['period_min']:.1f} - {trend_analysis['period_max']:.1f}", f" Average: {trend_analysis['period_avg']:.1f}", "" ]) # History table history_data = [] for i, snapshot in enumerate(history): timestamp_short = snapshot.timestamp.split('T')[0] + ' ' + snapshot.timestamp.split('T')[1][:8] commit_short = snapshot.git_commit[:8] if snapshot.git_commit else "unknown" history_data.append([ len(history) - i, # Reverse numbering (newest first) timestamp_short, f"{snapshot.performance_index:.1f}", commit_short, f"{snapshot.template_ops_per_sec:.0f}", f"{snapshot.database_ops_per_sec:.0f}", f"{snapshot.memory_usage_mb:.1f}", snapshot.notes[:20] + "..." if len(snapshot.notes) > 20 else snapshot.notes ]) output_lines.extend([ "šŸ“ˆ Recent Performance History:", "", tabulate(history_data, headers=["#", "Timestamp", "Index", "Commit", "Template", "Database", "Memory", "Notes"], tablefmt="grid") ]) output_text = '\n'.join(output_lines) else: # simple format latest = summary["latest_snapshot"] output_lines = [ f"Current Performance Index: {latest['performance_index']:.1f}/100" ] if trend_analysis.get("trend") != "insufficient_data": output_lines.append(f"Trend ({trend_days}d): {trend_analysis['trend']} ({trend_analysis['trend_change_percent']:+.1f}%)") output_lines.append(f"History entries: {len(history)}") # Show last few snapshots for i, snapshot in enumerate(history[:5]): date_part = snapshot.timestamp.split('T')[0] output_lines.append(f" {date_part}: {snapshot.performance_index:.1f}") output_text = '\n'.join(output_lines) if output: with open(output, 'w') as f: f.write(output_text) click.echo(f"šŸ“ Performance history saved to {output}") else: click.echo(output_text) except Exception as e: click.echo(f"Performance history retrieval failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # Batch Processing Commands - Issue #17 @cli.command(name='ingest-dir') @click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path)) @click.option('--recursive', '-r', is_flag=True, help='Process directories recursively') @click.option('--depth', type=int, help='Maximum depth for recursive processing') @click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)') @click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']), default='continue', help='Error handling strategy') @click.option('--quiet', '-q', is_flag=True, help='Suppress progress output') @pass_config def ingest_dir(config, directory, recursive, depth, pattern, error_handling, quiet): """Process all Markdown files in directory. Ingests all markdown files found in the specified directory into the database. Supports recursive processing with depth control and flexible error handling. Examples: markitect ingest-dir ./docs markitect ingest-dir ./content --recursive --depth 3 markitect ingest-dir ./articles --pattern "*.markdown" --error-handling stop """ try: # Convert error handling string to enum error_strategy = ErrorHandling[error_handling.upper()] # Initialize batch processor processor = BatchProcessor( error_handling=error_strategy, show_progress=not quiet, max_depth=depth ) # Find files to process if not quiet: click.echo(f"šŸ” Searching for files in {directory}...") files = processor.find_markdown_files( directory=directory, pattern=pattern, recursive=recursive, depth=depth ) if not files: click.echo(f"šŸ“­ No files found matching pattern '{pattern}' in {directory}") return # Create file processor for ingestion file_processor = create_file_processor(config, ProcessingMode.INGEST) # Process files result = processor.process_files(files, file_processor, "Ingesting") # Exit with error code if there were failures if result.failed > 0 and error_strategy == ErrorHandling.STOP: sys.exit(1) except Exception as e: click.echo(f"Directory ingestion failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='batch-process') @click.argument('pattern', type=str) @click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']), default='ingest', help='Operation to perform on matched files') @click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']), default='continue', help='Error handling strategy') @click.option('--quiet', '-q', is_flag=True, help='Suppress progress output') @pass_config def batch_process(config, pattern, operation, error_handling, quiet): """Process files matching glob pattern. Uses glob patterns to find and process files. Supports various operations including ingestion, status checking, and validation. Examples: markitect batch-process "**/*.md" --operation ingest markitect batch-process "docs/**/*.markdown" --operation status markitect batch-process "./content/*.md" --operation validate --error-handling stop """ try: # Convert strings to enums error_strategy = ErrorHandling[error_handling.upper()] processing_mode = ProcessingMode[operation.upper()] # Initialize batch processor processor = BatchProcessor( error_handling=error_strategy, show_progress=not quiet ) # Find files using glob pattern if not quiet: click.echo(f"šŸ” Searching for files matching '{pattern}'...") files = processor.find_files_by_glob(pattern) if not files: click.echo(f"šŸ“­ No files found matching pattern '{pattern}'") return # Create file processor for the specified operation file_processor = create_file_processor(config, processing_mode) # Process files operation_name = f"{operation.title()}ing" result = processor.process_files(files, file_processor, operation_name) # Exit with error code if there were failures if result.failed > 0 and error_strategy == ErrorHandling.STOP: sys.exit(1) except Exception as e: click.echo(f"Batch processing failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='recursive') @click.argument('directory', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path)) @click.option('--depth', type=int, default=None, help='Maximum recursion depth') @click.option('--operation', type=click.Choice(['ingest', 'status', 'validate']), default='status', help='Operation to perform') @click.option('--pattern', default='*.md', help='File pattern to match (default: *.md)') @click.option('--error-handling', type=click.Choice(['stop', 'continue', 'skip']), default='continue', help='Error handling strategy') @click.option('--quiet', '-q', is_flag=True, help='Suppress progress output') @pass_config def recursive(config, directory, depth, operation, pattern, error_handling, quiet): """Recursive processing with depth control. Performs recursive operations on directory trees with configurable depth limits. This command provides fine-grained control over recursive processing behavior. Examples: markitect recursive ./docs --depth 2 --operation ingest markitect recursive ./content --depth 5 --operation status --pattern "*.markdown" markitect recursive ./src --operation validate --error-handling stop """ try: # Convert strings to enums error_strategy = ErrorHandling[error_handling.upper()] processing_mode = ProcessingMode[operation.upper()] # Initialize batch processor with depth control processor = BatchProcessor( error_handling=error_strategy, show_progress=not quiet, max_depth=depth ) # Find files recursively if not quiet: depth_str = f" (max depth: {depth})" if depth else "" click.echo(f"šŸ” Recursively searching {directory}{depth_str}...") files = processor.find_markdown_files( directory=directory, pattern=pattern, recursive=True, depth=depth ) if not files: click.echo(f"šŸ“­ No files found matching pattern '{pattern}' in {directory}") return # Create file processor for the specified operation file_processor = create_file_processor(config, processing_mode) # Process files operation_name = f"Recursively {operation}ing" result = processor.process_files(files, file_processor, operation_name) # Exit with error code if there were failures if result.failed > 0 and error_strategy == ErrorHandling.STOP: sys.exit(1) except Exception as e: click.echo(f"Recursive processing failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # Configuration Management Commands - Issue #18 @cli.command(name='config-show') @click.option('--format', 'output_format', type=click.Choice(['yaml', 'json', 'simple']), default='yaml', help='Output format for configuration display') @click.option('--show-sensitive', is_flag=True, help='Show sensitive values (tokens, passwords)') @pass_config def config_show(config, output_format, show_sensitive): """Display current configuration. Shows comprehensive configuration information including current settings, file sources, environment variables, and workspace information. Examples: markitect config-show markitect config-show --format json markitect config-show --format simple --show-sensitive """ try: config_manager = ConfigurationManager() output = config_manager.display_config( show_sensitive=show_sensitive, output_format=output_format ) click.echo(output) except Exception as e: click.echo(f"Failed to display configuration: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='config-set') @click.argument('key', type=str) @click.argument('value', type=str) @click.option('--config-file', type=click.Path(), help='Target configuration file') @click.option('--validate/--no-validate', default=True, help='Validate configuration after setting') @pass_config def config_set(config, key, value, config_file, validate): """Set configuration values. Sets a configuration value and persists it to a configuration file. Supports nested keys using dot notation (e.g., 'gitea.url'). Examples: markitect config-set gitea_url http://localhost:3000 markitect config-set repo_owner myorganization markitect config-set api_token abc123def456 markitect config-set workspace.dir ./my_workspace """ try: config_manager = ConfigurationManager() # Set the configuration value success = config_manager.set_config_value(key, value, config_file) if success: click.echo(f"āœ… Configuration updated: {key} = {value}") # Show which file was updated target_file = config_manager._get_target_config_file(config_file) click.echo(f"šŸ“ Updated file: {target_file}") # Validate configuration if requested if validate: validation_results = config_manager.validate_configuration() errors = [r for r in validation_results if r['status'] == 'error'] if errors: click.echo("āš ļø Configuration validation warnings:") for error in errors: click.echo(f" • {error['key']}: {error['message']}") else: click.echo(f"āŒ Failed to set configuration: {key}", err=True) sys.exit(1) except ValueError as e: click.echo(f"āŒ Configuration error: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to set configuration: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='config-init') @click.option('--project-dir', type=click.Path(path_type=Path), help='Target project directory') @click.option('--interactive/--no-interactive', default=True, help='Interactive configuration setup') @click.option('--force', is_flag=True, help='Overwrite existing configuration') @pass_config def config_init(config, project_dir, interactive, force): """Initialize configuration for new project. Creates a new configuration file with sensible defaults and sets up the necessary directory structure for a MarkiTect project. Examples: markitect config-init markitect config-init --project-dir ./my-project markitect config-init --no-interactive --force """ try: target_dir = project_dir or Path.cwd() config_file = target_dir / '.markitect.yml' # Check if configuration already exists if config_file.exists() and not force: click.echo(f"āŒ Configuration file already exists: {config_file}") click.echo(" Use --force to overwrite or choose a different directory") sys.exit(1) config_manager = ConfigurationManager() # Interactive setup if requested initial_config = { 'gitea_url': 'http://localhost:3000', 'repo_owner': '', 'repo_name': target_dir.name, 'workspace_dir': '.markitect_workspace', 'cache_dir': '.ast_cache', 'tests_dir': 'tests', 'test_file_pattern': 'test_issue_{issue_num}_{scenario}.py', 'claude_code_command': 'claude' } if interactive: click.echo("šŸ”§ Interactive MarkiTect configuration setup") click.echo(f"šŸ“ Target directory: {target_dir}") click.echo() # Prompt for each configuration value initial_config['gitea_url'] = click.prompt( 'Gitea server URL', default=initial_config['gitea_url'] ) initial_config['repo_owner'] = click.prompt( 'Repository owner/organization', default=initial_config['repo_owner'] ) initial_config['repo_name'] = click.prompt( 'Repository name', default=initial_config['repo_name'] ) if click.confirm('Configure API token now?', default=False): initial_config['api_token'] = click.prompt( 'API token', default='', hide_input=True ) initial_config['workspace_dir'] = click.prompt( 'Workspace directory', default=initial_config['workspace_dir'] ) initial_config['tests_dir'] = click.prompt( 'Tests directory', default=initial_config['tests_dir'] ) # Initialize the project result = config_manager.initialize_project_config(target_dir, interactive=False) # Update with interactive values if provided if interactive: config_manager._save_config_file(initial_config, config_file) result['config'] = initial_config click.echo("āœ… MarkiTect project initialized successfully!") click.echo(f"šŸ“„ Configuration file: {result['config_file']}") click.echo("šŸ“ Created directories:") for directory in result['created_directories']: click.echo(f" • {directory}") # Show validation results validation_results = config_manager.validate_configuration(result['config']) warnings = [r for r in validation_results if r['status'] == 'warning'] errors = [r for r in validation_results if r['status'] == 'error'] if warnings: click.echo("āš ļø Configuration warnings:") for warning in warnings: click.echo(f" • {warning['message']}") if errors: click.echo("āŒ Configuration errors:") for error in errors: click.echo(f" • {error['message']}") else: click.echo("šŸŽ‰ Configuration validation passed!") except Exception as e: click.echo(f"āŒ Failed to initialize configuration: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='config-validate') @click.option('--verbose', '-v', is_flag=True, help='Show detailed validation information') @pass_config def config_validate(config, verbose): """Validate current configuration. Checks the current configuration for common issues, missing required fields, and validates paths and URLs. Provides suggestions for fixing any problems. Examples: markitect config-validate markitect config-validate --verbose """ try: config_manager = ConfigurationManager() validation_results = config_manager.validate_configuration() # Categorize results errors = [r for r in validation_results if r['status'] == 'error'] warnings = [r for r in validation_results if r['status'] == 'warning'] ok_results = [r for r in validation_results if r['status'] == 'ok'] # Display summary click.echo(f"šŸ“Š Configuration Validation Summary:") click.echo(f" āœ… OK: {len(ok_results)}") click.echo(f" āš ļø Warnings: {len(warnings)}") click.echo(f" āŒ Errors: {len(errors)}") click.echo() # Show errors if errors: click.echo("āŒ Configuration Errors:") for error in errors: click.echo(f" • {error['key']}: {error['message']}") click.echo() # Show warnings if warnings: click.echo("āš ļø Configuration Warnings:") for warning in warnings: click.echo(f" • {warning['key']}: {warning['message']}") click.echo() # Show OK results in verbose mode if verbose and ok_results: click.echo("āœ… Valid Configuration:") for ok_result in ok_results: click.echo(f" • {ok_result['key']}: {ok_result['message']}") click.echo() # Overall status if errors: click.echo("āŒ Configuration validation failed") sys.exit(1) elif warnings: click.echo("āš ļø Configuration validation passed with warnings") else: click.echo("āœ… Configuration validation passed") except Exception as e: click.echo(f"āŒ Configuration validation failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='config-help') @click.argument('key', required=False) @pass_config def config_help(config, key): """Get help information for configuration keys. Provides detailed information about available configuration options, their purposes, and example values. Examples: markitect config-help markitect config-help gitea_url markitect config-help api_token """ try: config_manager = ConfigurationManager() help_text = config_manager.get_config_help(key) click.echo(help_text) except Exception as e: click.echo(f"āŒ Failed to get configuration help: {e}", err=True) sys.exit(1) # Import PluginType for the CLI commands from .plugins.base import PluginType # Plugin Management Commands @cli.command(name='plugin-list') @click.option('--type', 'plugin_type', type=click.Choice([pt.value for pt in PluginType]), help='Filter by plugin type') @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format') @pass_config def plugin_list(config, plugin_type, output_format): """List all available plugins. Shows discovered and loaded plugins with their metadata and status. Examples: markitect plugin-list markitect plugin-list --type processor markitect plugin-list --format json """ try: from .plugins import PluginManager, PluginType manager = PluginManager() manager.discover_plugins() # Filter by type if specified filter_type = None if plugin_type: filter_type = PluginType(plugin_type) plugins = manager.list_plugins(filter_type) if output_format == 'table': if not plugins: click.echo("No plugins found.") return # Create table output click.echo("šŸ“¦ Available Plugins:") click.echo() for name, info in plugins.items(): status = "āœ… Loaded" if info.get('loaded', False) else "⚪ Available" click.echo(f"{status} {name}") click.echo(f" Type: {info.get('type', 'unknown')}") click.echo(f" Version: {info.get('version', 'unknown')}") click.echo(f" Description: {info.get('description', 'No description')}") if info.get('author'): click.echo(f" Author: {info['author']}") click.echo() else: click.echo(format_output(plugins, output_format)) except Exception as e: click.echo(f"āŒ Failed to list plugins: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='plugin-load') @click.argument('plugin_name', type=str) @click.option('--config-data', type=str, help='JSON configuration data for plugin') @pass_config def plugin_load(config, plugin_name, config_data): """Load a specific plugin. Load and initialize a plugin with optional configuration. Examples: markitect plugin-load json_formatter markitect plugin-load my_processor --config-data '{"param": "value"}' """ try: from .plugins import PluginManager import json manager = PluginManager() # Parse config data if provided plugin_config = {} if config_data: try: plugin_config = json.loads(config_data) except json.JSONDecodeError as e: click.echo(f"āŒ Invalid JSON in config-data: {e}", err=True) sys.exit(1) if config.get('verbose'): click.echo(f"šŸ” Attempting to load plugin '{plugin_name}'...") plugin = manager.load_plugin(plugin_name, plugin_config) if plugin: click.echo(f"āœ… Plugin '{plugin_name}' loaded successfully") click.echo(f" Type: {plugin.metadata.plugin_type.value}") click.echo(f" Version: {plugin.metadata.version}") else: click.echo(f"āŒ Failed to load plugin '{plugin_name}'", err=True) if config.get('verbose'): # Additional debug info discovered = manager.discover_plugins() if plugin_name in discovered: click.echo(f" Plugin found in discovery: {discovered[plugin_name]}") else: click.echo(f" Plugin not found in discovery. Available: {list(discovered.keys())[:5]}") sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to load plugin: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='plugin-unload') @click.argument('plugin_name', type=str) @pass_config def plugin_unload(config, plugin_name): """Unload a plugin. Unload and cleanup a previously loaded plugin. Examples: markitect plugin-unload json_formatter """ try: from .plugins import PluginManager manager = PluginManager() if manager.unload_plugin(plugin_name): click.echo(f"āœ… Plugin '{plugin_name}' unloaded successfully") else: click.echo(f"āŒ Plugin '{plugin_name}' not found or not loaded", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to unload plugin: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='plugin-info') @click.argument('plugin_name', type=str) @click.option('--format', 'output_format', type=click.Choice(['simple', 'json', 'yaml']), default='simple', help='Output format') @pass_config def plugin_info(config, plugin_name, output_format): """Show detailed information about a plugin. Display comprehensive information about a specific plugin including metadata, configuration, and status. Examples: markitect plugin-info json_formatter markitect plugin-info my_processor --format json """ try: from .plugins import PluginManager manager = PluginManager() manager.discover_plugins() plugins = manager.list_plugins() if plugin_name not in plugins: click.echo(f"āŒ Plugin '{plugin_name}' not found", err=True) sys.exit(1) plugin_info = plugins[plugin_name] if output_format == 'simple': click.echo(f"šŸ“¦ Plugin: {plugin_name}") click.echo(f" Name: {plugin_info.get('name', 'N/A')}") click.echo(f" Version: {plugin_info.get('version', 'N/A')}") click.echo(f" Type: {plugin_info.get('type', 'N/A')}") click.echo(f" Description: {plugin_info.get('description', 'N/A')}") click.echo(f" Author: {plugin_info.get('author', 'N/A')}") click.echo(f" Status: {'Loaded' if plugin_info.get('loaded', False) else 'Available'}") if plugin_info.get('dependencies'): click.echo(f" Dependencies: {', '.join(plugin_info['dependencies'])}") if plugin_info.get('markitect_version'): click.echo(f" MarkiTect Version: {plugin_info['markitect_version']}") else: click.echo(format_output(plugin_info, output_format)) except Exception as e: click.echo(f"āŒ Failed to get plugin info: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='plugin-discover') @click.option('--refresh', is_flag=True, help='Force refresh of plugin discovery') @pass_config def plugin_discover(config, refresh): """Discover available plugins. Scan for plugins in configured directories and report findings. Examples: markitect plugin-discover markitect plugin-discover --refresh """ try: from .plugins import PluginManager manager = PluginManager() discovered = manager.discover_plugins(refresh=refresh) click.echo(f"šŸ” Plugin Discovery Complete") click.echo(f" Found {len(discovered)} plugins") if discovered: click.echo(" Discovered plugins:") for name in sorted(discovered.keys()): click.echo(f" • {name}") else: click.echo(" No plugins found") except Exception as e: click.echo(f"āŒ Failed to discover plugins: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # GraphQL Commands @cli.command(name='graphql-serve') @click.option('--host', default='127.0.0.1', help='Host to bind to') @click.option('--port', default=5000, type=int, help='Port to bind to') @click.option('--debug', is_flag=True, help='Enable debug mode') @click.option('--no-cors', is_flag=True, help='Disable CORS') @pass_config def graphql_serve(config, host, port, debug, no_cors): """Start GraphQL server for MarkiTect API. Starts a GraphQL server that exposes MarkiTect's database content including markdown files, schemas, and ASTs through a GraphQL interface. Examples: markitect graphql-serve markitect graphql-serve --host 0.0.0.0 --port 8000 markitect graphql-serve --debug --no-cors """ try: from .graphql import GraphQLServer server = GraphQLServer( db_path=config['database_path'], enable_cors=not no_cors ) server.run(host=host, port=port, debug=debug) except ImportError: click.echo("āŒ GraphQL server requires additional dependencies.", err=True) click.echo("Install with: pip install flask flask-cors", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to start GraphQL server: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='graphql-query') @click.argument('query', type=str) @click.option('--variables', type=str, help='JSON variables for the query') @click.option('--endpoint', default='http://localhost:5000/graphql', help='GraphQL endpoint URL') @click.option('--local', is_flag=True, help='Execute query locally without HTTP') @click.option('--format', 'output_format', type=click.Choice(['json', 'yaml', 'table']), default='json', help='Output format') @pass_config def graphql_query(config, query, variables, endpoint, local, output_format): """Execute GraphQL query against MarkiTect API. Execute GraphQL queries to retrieve data from MarkiTect's database. Can run against a local server or execute queries directly. Examples: markitect graphql-query "{ markdownFiles { id filename } }" markitect graphql-query "query GetFile($id: Int!) { markdownFile(id: $id) { filename content } }" --variables '{"id": 1}' markitect graphql-query "{ databaseStats { totalFiles totalSchemas } }" --local """ try: from .graphql import GraphQLClient import json as json_module # Parse variables if provided parsed_variables = {} if variables: try: parsed_variables = json_module.loads(variables) except json_module.JSONDecodeError as e: click.echo(f"āŒ Invalid JSON in variables: {e}", err=True) sys.exit(1) # Execute query if local: client = GraphQLClient() result = client.execute_local(query, parsed_variables) else: client = GraphQLClient(endpoint) result = client.execute(query, parsed_variables) # Format output if result.get('errors'): click.echo("āŒ GraphQL Errors:", err=True) for error in result['errors']: click.echo(f" {error.get('message', str(error))}", err=True) if result.get('data'): if output_format == 'json': click.echo(json_module.dumps(result['data'], indent=2)) elif output_format == 'yaml': import yaml click.echo(yaml.dump(result['data'], default_flow_style=False)) elif output_format == 'table': # Simple table format for basic data click.echo(format_output(result['data'], 'table')) except ImportError as e: if 'requests' in str(e): click.echo("āŒ GraphQL client requires requests library.", err=True) click.echo("Install with: pip install requests", err=True) else: click.echo(f"āŒ Missing dependency: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to execute GraphQL query: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='graphql-schema') @click.option('--endpoint', default='http://localhost:5000/graphql', help='GraphQL endpoint URL') @click.option('--local', is_flag=True, help='Get schema locally without HTTP') @click.option('--format', 'output_format', type=click.Choice(['sdl', 'json']), default='sdl', help='Schema format (SDL or introspection JSON)') @pass_config def graphql_schema(config, endpoint, local, output_format): """Get GraphQL schema definition. Retrieve and display the GraphQL schema for MarkiTect's API. Useful for understanding available queries and types. Examples: markitect graphql-schema markitect graphql-schema --local markitect graphql-schema --format json """ try: if local: from .graphql import schema try: from graphql.utilities import print_schema schema_sdl = print_schema(schema.graphql_schema) except (AttributeError, ImportError): # Fallback to simple string representation schema_sdl = str(schema) if output_format == 'sdl': click.echo(schema_sdl) else: # For JSON, we'd need introspection query introspection_query = """ query IntrospectionQuery { __schema { queryType { name } mutationType { name } subscriptionType { name } types { ...FullType } } } fragment FullType on __Type { kind name description fields(includeDeprecated: true) { name description args { ...InputValue } type { ...TypeRef } } } fragment InputValue on __InputValue { name description type { ...TypeRef } defaultValue } fragment TypeRef on __Type { kind name ofType { kind name ofType { kind name } } } """ from .graphql import GraphQLClient client = GraphQLClient() result = client.execute_local(introspection_query) import json click.echo(json.dumps(result, indent=2)) else: # Get from HTTP endpoint import requests if output_format == 'sdl': # Try to get SDL from /schema endpoint try: response = requests.get(endpoint.replace('/graphql', '/schema')) if response.status_code == 200: schema_data = response.json() click.echo(schema_data.get('schema', 'Schema not available')) else: click.echo(f"āŒ Failed to get schema: HTTP {response.status_code}", err=True) sys.exit(1) except requests.RequestException as e: click.echo(f"āŒ Failed to connect to GraphQL server: {e}", err=True) sys.exit(1) else: # Get introspection JSON from .graphql import GraphQLClient client = GraphQLClient(endpoint) # Use introspection query (same as above) result = client.execute(introspection_query) import json click.echo(json.dumps(result, indent=2)) except ImportError as e: click.echo(f"āŒ Missing dependency: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to get GraphQL schema: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command(name='graphql-examples') @pass_config def graphql_examples(config): """Show example GraphQL queries for MarkiTect. Display common GraphQL query examples to help users get started with querying MarkiTect's data through the GraphQL interface. """ examples = """ šŸ“Š MarkiTect GraphQL Query Examples =================================== šŸ” Basic Queries: # Get all markdown files with basic info { markdownFiles(limit: 10) { id filename hasFrontMatter wordCount createdAt } } # Get specific file by ID { markdownFile(id: 1) { filename content frontMatter { key value } wordCount lineCount } } # Get file by filename { markdownFile(filename: "docs/README.md") { id content frontMatterRaw } } šŸ“‹ Schema Queries: # List all schemas { schemas { id filename title description schemaVersion propertyCount } } # Get specific schema { schema(id: 1) { filename title schemaContent createdAt } } 🌳 AST Queries: # Get AST for a file { ast(fileId: 1) { filename headingCount linkCount imageCount codeBlockCount tree { type value level } } } # JSONPath query on AST { astQuery(fileId: 1, jsonpath: "$..heading") { # Returns array of heading nodes } } šŸ” Search Queries: # Search across files and schemas { search(query: "markdown", limit: 5) { type score highlight file { filename wordCount } schema { title description } } } # Search only files { search(query: "README", type: "file") { type score file { filename content } } } šŸ“Š Statistics: # Database overview { databaseStats { totalFiles totalSchemas totalSizeBytes lastUpdated } } šŸ” Filtering: # Files with front matter created after date { markdownFiles( hasFrontMatter: true, createdAfter: "2023-01-01T00:00:00" ) { filename frontMatterRaw createdAt } } šŸ’” Usage Examples: # Execute a query locally markitect graphql-query "{ databaseStats { totalFiles } }" --local # Execute with variables markitect graphql-query \\ "query GetFile($id: Int!) { markdownFile(id: $id) { filename } }" \\ --variables '{"id": 1}' --local # Start GraphQL server markitect graphql-serve --port 5000 # Query running server markitect graphql-query "{ markdownFiles { filename } }" \\ --endpoint http://localhost:5000/graphql šŸ› ļø Mutation Examples (Write Operations): # Add a new markdown file markitect graphql-mutate \\ 'mutation { addMarkdownFile(filename: "new-doc.md", content: "# New Document\\n\\nContent here") { success markdownFile { id filename } errors } }' \\ --local # Update existing file markitect graphql-mutate \\ 'mutation { updateMarkdownFile(id: 1, content: "# Updated\\n\\nNew content") { success errors } }' \\ --local # Add a JSON schema markitect graphql-mutate \\ 'mutation { addSchema(filename: "user-schema.json", schemaContent: "{\\"type\\": \\"object\\", \\"properties\\": {\\"name\\": {\\"type\\": \\"string\\"}}}") { success schema { id title } errors } }' \\ --local # Delete a schema markitect graphql-mutate \\ 'mutation { deleteSchema(filename: "old-schema.json") { success deletedFilename errors } }' \\ --local šŸ’” Access GraphQL Playground at http://localhost:5000/graphql when server is running """ click.echo(examples) @cli.command(name='graphql-mutate') @click.argument('mutation', required=True) @click.option('--variables', default='{}', help='JSON string of mutation variables') @click.option('--endpoint', default='http://localhost:5000/graphql', help='GraphQL endpoint URL') @click.option('--local', is_flag=True, help='Execute mutation locally without HTTP') @click.option('--format', 'output_format', type=click.Choice(['json', 'yaml', 'table']), default='json', help='Output format') @pass_config def graphql_mutate(config, mutation, variables, endpoint, local, output_format): """Execute GraphQL mutations for write operations. Execute GraphQL mutations to add, update, or delete data in MarkiTect. Supports both local and remote execution modes. Examples: # Add a new markdown file locally markitect graphql-mutate 'mutation { addMarkdownFile(filename: "test.md", content: "# Test\\nContent") { success markdown_file { id filename } } }' --local # Update an existing file markitect graphql-mutate 'mutation { updateMarkdownFile(id: 1, content: "# Updated\\nContent") { success errors } }' --local # Add a schema with variables markitect graphql-mutate 'mutation($filename: String!, $content: JSONString!) { addSchema(filename: $filename, schemaContent: $content) { success schema { id title } } }' --variables '{"filename": "test.json", "content": "{\\"type\\": \\"object\\"}"}' --local """ import sys import json import yaml try: # Parse variables try: mutation_variables = json.loads(variables) if variables != '{}' else {} except json.JSONDecodeError: click.echo("āŒ Invalid JSON in variables parameter", err=True) sys.exit(1) if local: # Execute locally using the GraphQL client try: from .graphql import GraphQLClient client = GraphQLClient() result = client.execute_local(mutation, variables=mutation_variables) except ImportError as e: click.echo(f"āŒ Missing dependency: {e}", err=True) sys.exit(1) else: # Execute remotely try: from .graphql import GraphQLClient client = GraphQLClient(endpoint) result = client.execute(mutation, variables=mutation_variables) except ImportError as e: click.echo(f"āŒ Missing dependency: {e}", err=True) sys.exit(1) # Format and display output if output_format == 'json': click.echo(json.dumps(result, indent=2)) elif output_format == 'yaml': click.echo(yaml.dump(result, default_flow_style=False)) elif output_format == 'table': # For mutations, simple table output if result.get('data'): click.echo("Mutation Result:") for key, value in result['data'].items(): if isinstance(value, dict): click.echo(f" {key}:") for sub_key, sub_value in value.items(): click.echo(f" {sub_key}: {sub_value}") else: click.echo(f" {key}: {value}") if result.get('errors'): click.echo("Errors:") for error in result['errors']: click.echo(f" - {error.get('message', error)}") except ImportError as e: click.echo(f"āŒ Missing dependency: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to execute mutation: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # ============================================================================= # Full Text Search Commands (Issue #83) # ============================================================================= @cli.group('search') @pass_config def search_group(config): """Full text search operations using FTS5.""" pass @search_group.command('init') @click.option('--rebuild', is_flag=True, help='Rebuild existing indexes') @pass_config def search_init(config, rebuild): """Initialize full text search indexes.""" db_path = get_database_path(config) try: from .plugins.builtin.search import FTSSearchPlugin search_plugin = FTSSearchPlugin() search_plugin.initialize(db_path) if rebuild: click.echo("šŸ”„ Rebuilding search indexes...") stats = search_plugin.rebuild_index(db_path) click.echo(f"āœ… Indexed {stats.get('files_indexed', 0)} files and {stats.get('schemas_indexed', 0)} schemas") if 'error' in stats: click.echo(f"āš ļø Warning: {stats['error']}", err=True) else: click.echo("āœ… Search indexes initialized") # Show status search_stats = search_plugin.get_search_stats(db_path) if search_stats.get('fts_enabled'): click.echo(f"šŸ“Š FTS5 enabled with {len(search_stats.get('fts_tables', []))} tables") else: click.echo("āš ļø FTS5 not available, will fall back to simple search") except ImportError as e: click.echo(f"āŒ Search plugin not available: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to initialize search: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @search_group.command('query') @click.argument('query') @click.option('--type', 'content_type', default='all', type=click.Choice(['all', 'files', 'schemas']), help='Content type to search') @click.option('--limit', default=20, help='Maximum number of results') @click.option('--offset', default=0, help='Result offset for pagination') @click.option('--format', 'output_format', default='table', type=click.Choice(['json', 'yaml', 'table']), help='Output format') @click.option('--no-highlight', is_flag=True, help='Disable result highlighting') @pass_config def search_query(config, query, content_type, limit, offset, output_format, no_highlight): """Perform full text search query.""" db_path = get_database_path(config) try: from .plugins.builtin.search import FTSSearchPlugin search_plugin = FTSSearchPlugin() results = search_plugin.search(db_path, query, content_type, limit, offset) if output_format == 'json': click.echo(json.dumps(results, indent=2, default=str)) elif output_format == 'yaml': click.echo(yaml.dump(results, default_flow_style=False)) else: # Table format if not results: click.echo(f"No results found for '{query}'") return # Prepare table data table_data = [] headers = ['Score', 'Type', 'File/Schema', 'Preview'] for result in results: score = f"{result.get('score', 0):.2f}" result_type = result.get('type', 'unknown') if result_type == 'file': file_info = result.get('file', {}) name = file_info.get('filename', 'Unknown') if not no_highlight: preview = result.get('highlight', '')[:80] else: content = file_info.get('content', '') preview = content[:80] + '...' if len(content) > 80 else content elif result_type == 'schema': schema_info = result.get('schema', {}) name = schema_info.get('filename', 'Unknown') if not no_highlight: preview = result.get('highlight', '')[:80] else: desc = schema_info.get('description', '') preview = desc[:80] + '...' if len(desc) > 80 else desc else: name = 'Unknown' preview = '' table_data.append([score, result_type, name, preview]) click.echo(f"\nšŸ” Found {len(results)} results for '{query}':\n") click.echo(tabulate(table_data, headers=headers, tablefmt='grid')) if len(results) == limit: click.echo(f"\nšŸ’” Showing first {limit} results. Use --limit and --offset for more.") except ImportError as e: click.echo(f"āŒ Search plugin not available: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Search failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @search_group.command('status') @click.option('--format', 'output_format', default='table', type=click.Choice(['json', 'yaml', 'table']), help='Output format') @pass_config def search_status(config, output_format): """Show search index status and statistics.""" db_path = get_database_path(config) try: from .plugins.builtin.search import FTSSearchPlugin search_plugin = FTSSearchPlugin() stats = search_plugin.get_search_stats(db_path) if output_format == 'json': click.echo(json.dumps(stats, indent=2)) elif output_format == 'yaml': click.echo(yaml.dump(stats, default_flow_style=False)) else: # Table format click.echo("šŸ“Š Search Index Status\n") if stats.get('fts_enabled'): click.echo("āœ… FTS5 Full Text Search: Enabled") # Show table information if stats.get('fts_tables'): click.echo(f"šŸ“‹ FTS Tables: {', '.join(stats['fts_tables'])}") # Show document counts for key, value in stats.items(): if key.endswith('_documents'): table_name = key.replace('_documents', '') click.echo(f"šŸ“„ {table_name}: {value} documents") else: click.echo("āŒ FTS5 Full Text Search: Disabled") if 'error' in stats: click.echo(f" Error: {stats['error']}") click.echo(" Falling back to simple LIKE-based search") # Additional index info from .plugins.builtin.search import SearchIndexer indexer = SearchIndexer() index_info = indexer.get_index_info(db_path) if index_info.get('integrity_check'): status = "āœ…" if index_info['integrity_check'] == 'passed' else "āŒ" click.echo(f"{status} Index Integrity: {index_info['integrity_check']}") except ImportError as e: click.echo(f"āŒ Search plugin not available: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Failed to get search status: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @search_group.command('rebuild') @click.option('--optimize', is_flag=True, help='Optimize indexes after rebuild') @pass_config def search_rebuild(config, optimize): """Rebuild search indexes from scratch.""" db_path = get_database_path(config) try: from .plugins.builtin.search import FTSSearchPlugin, SearchIndexer click.echo("šŸ”„ Rebuilding search indexes...") search_plugin = FTSSearchPlugin() stats = search_plugin.rebuild_index(db_path) if 'error' in stats: click.echo(f"āŒ Rebuild failed: {stats['error']}", err=True) sys.exit(1) click.echo(f"āœ… Rebuilt indexes successfully") click.echo(f"šŸ“„ Files indexed: {stats.get('files_indexed', 0)}") click.echo(f"šŸ“‹ Schemas indexed: {stats.get('schemas_indexed', 0)}") if optimize: click.echo("šŸ”§ Optimizing indexes...") indexer = SearchIndexer() indexer.optimize_index(db_path) click.echo("āœ… Indexes optimized") except ImportError as e: click.echo(f"āŒ Search plugin not available: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Rebuild failed: {e}", err=True) if config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) # Register search commands cli.add_command(search_group) # ============================================================================= # Feature Wishlist Commands (Issue #85) # ============================================================================= @cli.group('wish') @pass_config def wishlist_group(config): """Feature wishlist management for capturing and refining ideas.""" pass @wishlist_group.command('create') @click.argument('title') @click.option('--description', '-d', help='Wish description') @click.option('--stage', default='discussion', type=click.Choice(['discussion', 'draft', 'ready']), help='Initial wishlist stage') @click.option('--priority', type=click.Choice(['low', 'medium', 'high']), help='Priority level for the wish') @pass_config def wish_create(config, title, description, stage, priority): """Create a new feature wishlist item.""" try: # Build description with template wish_description = f"""## šŸ’” Feature Wish **Summary**: {description or 'No description provided'} ## Current Thinking *What's the initial idea or inspiration?* ## Potential Benefits *Why might this be valuable?* ## Questions to Explore *What aspects need more thought?* ## Related Concepts *Are there similar ideas or existing features this relates to?* --- šŸ“‹ **Wishlist Stage**: {stage} šŸ·ļø **Auto-labeled**: `wish`, `wish/{stage}` """ # Prepare labels labels = ['wish', f'wish/{stage}'] if priority: labels.append(f'priority/{priority}') # Create the issue import subprocess result = subprocess.run([ 'tea', 'issue', 'create', '--title', f"šŸ’” Wish: {title}", '--description', wish_description, '--labels', ','.join(labels) ], capture_output=True, text=True) if result.returncode == 0: click.echo(f"āœ… Created wishlist item: '{title}'") click.echo(f"šŸ·ļø Labels: {', '.join(labels)}") click.echo(f"šŸ“‹ Stage: {stage}") else: click.echo(f"āŒ Failed to create wish: {result.stderr}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Error creating wish: {e}", err=True) sys.exit(1) @wishlist_group.command('list') @click.option('--stage', type=click.Choice(['discussion', 'draft', 'ready', 'archived', 'all']), default='all', help='Filter by wishlist stage') @click.option('--format', 'output_format', default='table', type=click.Choice(['table', 'simple', 'json']), help='Output format') @pass_config def wish_list(config, stage, output_format): """List feature wishlist items.""" try: # Build label filter if stage == 'all': label_filter = 'wish' else: label_filter = f'wish/{stage}' # Get issues with wish labels using simple output and parse manually import subprocess result = subprocess.run([ 'tea', 'issue', 'list', '--labels', label_filter, '--output', 'simple' ], capture_output=True, text=True) if result.returncode != 0: click.echo(f"āŒ Failed to fetch wishlist: {result.stderr}", err=True) sys.exit(1) # Parse the simple output: number title status assignee labels issues = [] if result.stdout.strip(): lines = result.stdout.strip().split('\n') for line in lines: if line.strip(): # Parse: number title status assignee labels parts = line.split() if len(parts) >= 2: issue_number = parts[0].strip() # Find where title ends (before status/assignee) title_parts = [] labels = [] collecting_title = True for part in parts[1:]: if collecting_title and part not in ['open', 'closed'] and not part.startswith('wish'): title_parts.append(part) else: collecting_title = False if part.startswith('wish'): labels.append(part) title = ' '.join(title_parts) created_at = "2025-10-03" # Default for simple format issues.append({ 'number': int(issue_number), 'title': title, 'labels': [{'name': label} for label in labels], 'created_at': created_at }) if not issues: click.echo(f"No wishlist items found for stage: {stage}") return if output_format == 'json': click.echo(json.dumps(issues, indent=2)) elif output_format == 'simple': for issue in issues: click.echo(f"#{issue['number']}: {issue['title']}") else: # Table format table_data = [] for issue in issues: # Extract stage from labels stage_labels = [label['name'] for label in issue.get('labels', []) if label['name'].startswith('wish/')] current_stage = stage_labels[0].replace('wish/', '') if stage_labels else 'unknown' table_data.append([ f"#{issue['number']}", current_stage, issue['title'].replace('šŸ’” Wish: ', ''), issue['created_at'][:10] # Just the date ]) from tabulate import tabulate headers = ['Issue', 'Stage', 'Title', 'Created'] click.echo(f"\nšŸ’” Feature Wishlist ({len(issues)} items):\n") click.echo(tabulate(table_data, headers=headers, tablefmt='grid')) except Exception as e: click.echo(f"āŒ Error listing wishes: {e}", err=True) sys.exit(1) @wishlist_group.command('promote') @click.argument('issue_number', type=int) @click.option('--stage', type=click.Choice(['draft', 'ready', 'archived']), help='Promote to specific stage') @pass_config def wish_promote(config, issue_number, stage): """Promote a wishlist item to the next stage or specific stage.""" try: if not stage: # Auto-determine next stage click.echo("šŸ”„ Auto-promoting to next logical stage...") # This would require fetching current labels and determining next stage stage = 'draft' # Default progression # Remove old stage labels and add new one import subprocess # Get current issue info using the list format and extract labels for our issue result = subprocess.run([ 'tea', 'issue', 'list', '--output', 'simple' ], capture_output=True, text=True) if result.returncode != 0: click.echo(f"āŒ Failed to fetch issues", err=True) sys.exit(1) # Parse output to find our issue and extract its current labels current_labels = [] if result.stdout.strip(): lines = result.stdout.strip().split('\n') for line in lines: if line.strip() and line.split()[0].strip() == str(issue_number): # Found our issue, extract labels from the end of the line parts = line.split() for part in parts: if part.startswith('wish'): current_labels.append(part) break if not current_labels: click.echo(f"āŒ Issue #{issue_number} not found or has no wish labels", err=True) sys.exit(1) # Build new labels (remove old wish/ stage labels) new_labels = [label for label in current_labels if not label.startswith('wish/')] new_labels.append(f'wish/{stage}') # Update labels result = subprocess.run([ 'tea', 'issue', 'edit', str(issue_number), '--labels', ','.join(new_labels) ], capture_output=True, text=True) if result.returncode == 0: click.echo(f"āœ… Promoted wish #{issue_number} to stage: {stage}") click.echo(f"šŸ·ļø Updated labels: {', '.join(new_labels)}") else: click.echo(f"āŒ Failed to promote wish: {result.stderr}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Error promoting wish: {e}", err=True) sys.exit(1) @wishlist_group.command('convert') @click.argument('issue_number', type=int) @click.option('--title', help='New title for the regular issue') @click.option('--copy-content', is_flag=True, default=True, help='Copy wishlist content to new issue') @pass_config def wish_convert(config, issue_number, title, copy_content): """Convert a ready wishlist item to a regular issue.""" try: import subprocess import json from datetime import datetime # Get the wishlist issue result = subprocess.run([ 'tea', 'issue', 'view', str(issue_number), '--output', 'json' ], capture_output=True, text=True) if result.returncode != 0: click.echo(f"āŒ Failed to fetch wish #{issue_number}", err=True) sys.exit(1) wish_issue = json.loads(result.stdout) # Check if it's ready for conversion labels = [label['name'] for label in wish_issue.get('labels', [])] if 'wish/ready' not in labels: click.echo(f"āš ļø Wish #{issue_number} is not marked as 'ready'. Current stage: {[l for l in labels if l.startswith('wish/')]}") if not click.confirm('Convert anyway?'): return # Prepare new issue content new_title = title or wish_issue['title'].replace('šŸ’” Wish: ', '') if copy_content: new_description = f"""## Converted from Wishlist Originally tracked as wishlist item #{issue_number}. ## Description {wish_issue.get('body', '')} --- *Converted from feature wishlist on {datetime.now().strftime('%Y-%m-%d')}* """ else: new_description = f"Converted from wishlist item #{issue_number}" # Create new regular issue result = subprocess.run([ 'tea', 'issue', 'create', '--title', new_title, '--description', new_description ], capture_output=True, text=True) if result.returncode == 0: # Close the wishlist item with reference to new issue # Extract issue number from tea output (usually shows the URL) lines = result.stdout.strip().split('\n') new_issue_url = lines[-1] if lines else "" new_issue_number = new_issue_url.split('/')[-1] if '/' in new_issue_url else "unknown" # Close wishlist item subprocess.run([ 'tea', 'issue', 'close', str(issue_number), '--comment', f'Converted to regular issue #{new_issue_number}' ], capture_output=True, text=True) click.echo(f"āœ… Converted wish #{issue_number} to regular issue #{new_issue_number}") click.echo(f"šŸ·ļø Original wishlist item closed") click.echo(f"šŸ“‹ New issue: {new_title}") else: click.echo(f"āŒ Failed to create new issue: {result.stderr}", err=True) sys.exit(1) except Exception as e: click.echo(f"āŒ Error converting wish: {e}", err=True) sys.exit(1) # Register wishlist commands cli.add_command(wishlist_group) # Register issue management commands # Register worktime tracking commands from markitect.finance.worktime_commands import worktime as worktime_group cli.add_command(worktime_group) # Query Paradigm Commands - Issue #62 @click.group() def paradigms(): """Discover and explore different query paradigms in MarkiTect.""" pass @paradigms.command('list') @pass_config def list_paradigms(config): """List all available query paradigms.""" from .query_paradigms.registry import registry paradigms = registry.list_all() print(f"šŸ“š MarkiTect Query Paradigms ({len(paradigms)} available)") print("=" * 50) # Group by category categories = {} for paradigm in paradigms: if paradigm.category not in categories: categories[paradigm.category] = [] categories[paradigm.category].append(paradigm) for category, paradigm_list in categories.items(): print(f"\nšŸ·ļø {category.upper()} PARADIGMS") print("-" * 30) for paradigm in paradigm_list: status = "āœ… IMPLEMENTED" if paradigm.name in ['SQL', 'FTS', 'GraphQL', 'JSONPath', 'Natural Language'] else "šŸ“‹ DOCUMENTED" print(f" {status} {paradigm.name} ({paradigm.complexity})") print(f" {paradigm.description}") print() @paradigms.command() @click.argument('query') @pass_config def search(config, query): """Search paradigms by name or description.""" from .query_paradigms.registry import registry results = registry.search_paradigms(query) if not results: print(f"āŒ No paradigms found matching '{query}'") return print(f"šŸ” Search results for '{query}' ({len(results)} found)") print("=" * 40) for paradigm in results: status = "āœ… IMPLEMENTED" if paradigm.name in ['SQL', 'FTS', 'GraphQL', 'JSONPath', 'Natural Language'] else "šŸ“‹ DOCUMENTED" print(f" {status} {paradigm.name} ({paradigm.category}, {paradigm.complexity})") print(f" {paradigm.description}") print() @paradigms.command() @click.argument('name') @pass_config def show(config, name): """Show detailed information about a specific paradigm.""" from .query_paradigms.registry import registry paradigm = registry.get(name) if not paradigm: print(f"āŒ Paradigm '{name}' not found.") print("\nAvailable paradigms:") for p in registry.list_all(): print(f" - {p.name}") return status = "āœ… IMPLEMENTED" if paradigm.name in ['SQL', 'FTS', 'GraphQL', 'JSONPath', 'Natural Language'] else "šŸ“‹ DOCUMENTED" print(f"šŸ” {paradigm.name} Query Paradigm") print("=" * (len(paradigm.name) + 20)) print(f"Status: {status}") print(f"Category: {paradigm.category}") print(f"Complexity: {paradigm.complexity}") print(f"Description: {paradigm.description}") print() print("šŸ“ Syntax Help:") print("-" * 15) print(paradigm.get_syntax_help()) print() print("šŸ’” Examples:") print("-" * 12) examples = paradigm.get_examples() for i, example in enumerate(examples, 1): print(f"{i}. {example['name']}") print(f" {example['description']}") print(f" Query: {example['query']}") print() @paradigms.command() @click.argument('paradigm_name') @click.argument('query') @click.option('--config-data', type=str, help='JSON configuration for the query') @pass_config def exec(config, paradigm_name, query, config_data): """Execute a query using specified paradigm.""" from .query_paradigms.registry import registry import json paradigm = registry.get(paradigm_name) if not paradigm: print(f"āŒ Paradigm '{paradigm_name}' not found.") return # Parse config if provided query_config = {} if config_data: try: query_config = json.loads(config_data) except json.JSONDecodeError: print("āŒ Invalid JSON in config-data parameter") return # Add database path from global config db_path = get_database_path(config) query_config['db_path'] = db_path # Validate query first valid, error = paradigm.validate_query(query) if not valid: print(f"āŒ Invalid query: {error}") return print(f"šŸš€ Executing {paradigm.name} query...") print(f"Query: {query}") if config_data: print(f"Config: {query_config}") print() try: result = paradigm.execute(query, query_config) print(f"ā±ļø Execution time: {result.execution_time_ms:.2f}ms") print(f"šŸ“Š Result count: {result.result_count}") print(f"āœ… Success: {result.success}") if result.error_message: print(f"āŒ Error: {result.error_message}") if result.metadata: print("\nšŸ“‹ Metadata:") for key, value in result.metadata.items(): print(f" {key}: {value}") if result.results: print(f"\nšŸ“„ Results:") for i, row in enumerate(result.results[:5], 1): # Show first 5 results print(f" {i}. {row}") if len(result.results) > 5: print(f" ... and {len(result.results) - 5} more results") except Exception as e: print(f"āŒ Execution error: {e}") @paradigms.command() @pass_config def categories(config): """List all available paradigm categories.""" from .query_paradigms.registry import registry categories = registry.get_categories() print("šŸ“‚ Available Categories:") for category in sorted(categories): paradigms = registry.list_by_category(category) print(f" {category}: {len(paradigms)} paradigms") # Register cost tracking commands if COST_TRACKING_AVAILABLE: cli.add_command(cost_commands) # Register profile management commands if PROFILE_MANAGEMENT_AVAILABLE: cli.add_command(profile_commands) # Register paradigms commands cli.add_command(paradigms) # Register asset management commands - Issue #143 try: from .asset_commands import asset, package, workspace cli.add_command(asset) cli.add_command(package) cli.add_command(workspace) ASSET_COMMANDS_AVAILABLE = True except ImportError: ASSET_COMMANDS_AVAILABLE = False # Register markdown commands plugin try: from .plugins.builtin.markdown_commands import MarkdownCommandsPlugin plugin_instance = MarkdownCommandsPlugin() plugin_instance.initialize() for command_name, command_func in plugin_instance.get_commands().items(): cli.add_command(command_func, name=command_name) except ImportError: pass # Plugin not available # Make cli function available as main entry point main = cli if __name__ == '__main__': main()