""" CLI Entry Point and Basic Commands - Issue #12 This module provides the command-line interface for MarkiTect, allowing users to interact with core functionality through terminal commands. Commands: - ingest: Process and store a markdown file - status: Show processing status and metadata for a file - list: List all stored files and their status Integration with existing components: - Uses DatabaseManager for file storage and retrieval - Uses DocumentManager for high-performance document processing - Maintains performance caching architecture """ import click import os import sys import json import yaml from pathlib import Path from typing import Optional from tabulate import tabulate from .database import DatabaseManager from .document_manager import DocumentManager from .serializer import ASTSerializer from .cache_service import CacheDirectoryService # Global options for CLI configuration pass_config = click.make_pass_decorator(dict, ensure=True) def format_output(data, output_format): """ Format data according to specified output format. Args: data: Data to format output_format: Format type ('table', 'json', 'yaml') Returns: Formatted string output """ if output_format == 'json': return json.dumps(data, indent=2, default=str) elif output_format == 'yaml': return yaml.dump(data, default_flow_style=False, allow_unicode=True) elif output_format == 'table': try: # Check if it's a list type if isinstance(data, (type([]), type(()))): if data and isinstance(data[0], dict): # List of dictionaries - format as table headers = sorted(data[0].keys()) rows = [] for item in data: row = [] for header in headers: row.append(item.get(header, '')) rows.append(row) return tabulate(rows, headers=headers, tablefmt='grid') else: # List of simple values return tabulate([[item] for item in data], headers=['Value'], tablefmt='grid') elif isinstance(data, dict): # Single dictionary - format as key-value table rows = [[key, value] for key, value in data.items()] return tabulate(rows, headers=['Key', 'Value'], tablefmt='grid') else: # Fallback to string representation return str(data) except Exception as e: # Fallback to string if table formatting fails return f"Table formatting error: {e}\nData: {str(data)}" else: # Default to table format return format_output(data, 'table') @click.group() @click.option('--verbose', '-v', is_flag=True, help='Enable verbose output') @click.option('--config', 'config_file', type=click.Path(exists=True), help='Configuration file path') @click.option('--database', type=click.Path(), help='Database file path') @pass_config def cli(config, verbose, database, config_file): """ MarkiTect - Advanced Markdown engine for structured content. Process markdown files with front matter support, AST caching, and relational metadata queries. Examples: markitect ingest document.md # Process a markdown file markitect status document.md # Check file status markitect list # List all stored files """ # Store configuration in context config['verbose'] = verbose config['config_file'] = config_file # Determine database path if database: config['database_path'] = database else: # Default database location config['database_path'] = os.path.expanduser('~/.markitect/markitect.db') # Initialize database manager and ensure database exists try: db_manager = DatabaseManager(config['database_path']) db_manager.initialize_database() config['db_manager'] = db_manager if verbose: click.echo(f"Using database: {config['database_path']}", err=True) except Exception as e: click.echo(f"Error initializing database: {e}", err=True) sys.exit(1) @cli.command() @click.argument('file_path', type=click.Path(exists=True)) @pass_config def ingest(config, file_path): """ Process and store a markdown file. Ingests a markdown file into the MarkiTect system, parsing its content, extracting front matter, generating AST cache, and storing metadata in the database. FILE_PATH: Path to the markdown file to process Examples: markitect ingest README.md markitect ingest docs/guide.md """ try: file_path = Path(file_path) if config['verbose']: click.echo(f"Processing file: {file_path}") # Initialize document manager with database manager doc_manager = DocumentManager(config['db_manager']) # Ingest the file result = doc_manager.ingest_file(file_path) if config['verbose']: click.echo(f"Processing results:") click.echo(f" File: {result['metadata']['filename']}") click.echo(f" AST nodes: {len(result['ast'])} nodes") click.echo(f" Cache file: {result['ast_cache_path']}") click.echo(f" Parse time: {result['parse_time']:.2f}s") click.echo(f" Cache time: {result['cache_time']:.2f}s") click.echo(f"✓ Successfully ingested: {file_path.name}") except FileNotFoundError: click.echo(f"Error: File not found: {file_path}", err=True) sys.exit(1) except PermissionError: click.echo(f"Error: Permission denied accessing: {file_path}", err=True) sys.exit(1) except Exception as e: click.echo(f"Error processing file: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.argument('file_path', type=str) @pass_config def status(config, file_path): """ Show processing status and metadata for a file. Displays information about a file's processing status, metadata, and front matter content from the database. FILE_PATH: Path or name of the file to check Examples: markitect status README.md markitect status docs/guide.md """ try: if config['verbose']: click.echo(f"Checking status for: {file_path}") # Get file information from database db_manager = config['db_manager'] file_info = db_manager.get_markdown_file(file_path) if file_info: click.echo(f"File: {file_info['filename']}") click.echo(f"Status: Processed") click.echo(f"Created: {file_info['created_at']}") if file_info['front_matter']: try: front_matter = eval(file_info['front_matter']) # Safe for our controlled data if front_matter: click.echo("Front Matter:") for key, value in front_matter.items(): click.echo(f" {key}: {value}") except (ValueError, TypeError, SyntaxError): click.echo("Front Matter: (parsing error)") elif file_info['front_matter'] is None: pass # No front matter to display if config['verbose']: content_preview = file_info['content'][:200] + "..." if len(file_info['content']) > 200 else file_info['content'] click.echo(f"Content preview: {content_preview}") else: click.echo(f"File not found in database: {file_path}") click.echo("Use 'markitect ingest' to process the file first.") sys.exit(1) except Exception as e: click.echo(f"Error checking file status: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.argument('file_path', type=str) @click.option('--output', '-o', type=click.Path(), help='Output file path (default: stdout)') @pass_config def get(config, file_path, output): """ Retrieve and output a processed markdown file. Loads the file from the database and AST cache, then serializes it back to markdown format. Supports outputting to file or stdout. FILE_PATH: Name of the file to retrieve Examples: markitect get README.md markitect get docs/guide.md --output modified_guide.md """ try: if config['verbose']: click.echo(f"Retrieving file: {file_path}") db_manager = config['db_manager'] # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect ingest' to process the file first.", err=True) sys.exit(1) # Load AST from cache cache_filename = f"{file_path}.ast.json" cache_path = Path('.ast_cache') / cache_filename if not cache_path.exists(): click.echo(f"AST cache not found: {cache_path}", err=True) click.echo("Try re-ingesting the file to regenerate cache.", err=True) sys.exit(1) # Read AST from cache with open(cache_path, 'r', encoding='utf-8') as f: ast = json.load(f) # Parse front matter from database front_matter = None if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) except (ValueError, TypeError, SyntaxError): if config['verbose']: click.echo("Warning: Could not parse front matter", err=True) # Serialize AST back to markdown serializer = ASTSerializer() markdown_content = serializer.serialize_to_markdown(ast, front_matter) # Output to file or stdout if output: output_path = Path(output) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(markdown_content) click.echo(f"✓ File written to: {output_path}") else: click.echo(markdown_content) if config['verbose']: click.echo(f"Retrieved {len(ast)} AST tokens", err=True) except Exception as e: click.echo(f"Error retrieving file: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.argument('file_path', type=str) @click.option('--add-section', type=str, help='Add section with title') @click.option('--section-content', type=str, default='', help='Content for new section') @click.option('--section-level', type=int, default=2, help='Heading level for new section (1-6)') @click.option('--update-front-matter', type=str, help='Update front matter (format: key:value)') @click.option('--output', '-o', type=click.Path(), help='Output file path (default: overwrite original in cache)') @pass_config def modify(config, file_path, add_section, section_content, section_level, update_front_matter, output): """ Modify the content of a processed markdown file. Loads the file from cache, applies modifications, and updates the cache or outputs to a new file. Supports adding sections and updating front matter. FILE_PATH: Name of the file to modify Examples: markitect modify README.md --add-section "New Section" --section-content "New content" markitect modify doc.md --update-front-matter "status:updated" markitect modify doc.md --add-section "Notes" --output modified_doc.md """ try: if config['verbose']: click.echo(f"Modifying file: {file_path}") db_manager = config['db_manager'] # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect ingest' to process the file first.", err=True) sys.exit(1) # Load AST from cache cache_filename = f"{file_path}.ast.json" cache_path = Path('.ast_cache') / cache_filename if not cache_path.exists(): click.echo(f"AST cache not found: {cache_path}", err=True) click.echo("Try re-ingesting the file to regenerate cache.", err=True) sys.exit(1) # Read AST from cache with open(cache_path, 'r', encoding='utf-8') as f: ast = json.load(f) # Parse front matter from database front_matter = {} if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) or {} except (ValueError, TypeError, SyntaxError): if config['verbose']: click.echo("Warning: Could not parse existing front matter", err=True) # Prepare modifications modifications = {} changes_made = [] # Handle add-section modification if add_section: modifications['add_section'] = { 'title': add_section, 'content': section_content, 'level': section_level } changes_made.append(f"Added section: {add_section}") # Handle front matter updates if update_front_matter: try: if ':' in update_front_matter: key, value = update_front_matter.split(':', 1) key = key.strip() value = value.strip() # Try to parse value as appropriate type if value.lower() in ['true', 'false']: value = value.lower() == 'true' elif value.isdigit(): value = int(value) elif value.replace('.', '').isdigit(): value = float(value) front_matter[key] = value changes_made.append(f"Updated front matter: {key} = {value}") else: click.echo("Invalid front matter format. Use 'key:value'", err=True) sys.exit(1) except ValueError as e: click.echo(f"Error parsing front matter update: {e}", err=True) sys.exit(1) if not changes_made: click.echo("No modifications specified. Use --add-section or --update-front-matter", err=True) sys.exit(1) # Apply modifications to AST serializer = ASTSerializer() if modifications: ast = serializer.modify_ast_content(ast, modifications) # Serialize back to markdown markdown_content = serializer.serialize_to_markdown(ast, front_matter) # Handle output if output: # Write to specified output file output_path = Path(output) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(markdown_content) click.echo(f"✓ Modified file written to: {output_path}") else: # Update the cache and database with modifications with open(cache_path, 'w', encoding='utf-8') as f: json.dump(ast, f, indent=2, ensure_ascii=False) # Update database with new front matter if front_matter: # Note: This would require extending DatabaseManager to update front matter # For now, we'll just note the modification if config['verbose']: click.echo("Note: Database front matter update not implemented yet", err=True) click.echo(f"✓ Modified file updated in cache: {file_path}") # Show changes made if config['verbose']: click.echo("Changes applied:", err=True) for change in changes_made: click.echo(f" - {change}", err=True) except Exception as e: click.echo(f"Error modifying file: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.argument('sql', type=str) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format') @pass_config def query(config, sql, format): """ Execute SQL query against the database. Execute read-only SQL queries to explore and analyze document metadata. Only SELECT and WITH statements are allowed for security. SQL: SQL query to execute (SELECT statements only) Examples: markitect query "SELECT filename, created_at FROM markdown_files" markitect query "SELECT COUNT(*) as total FROM markdown_files" --format json markitect query "SELECT * FROM markdown_files WHERE filename LIKE '%.md'" --format yaml """ try: if config['verbose']: click.echo(f"Executing query: {sql}", err=True) db_manager = config['db_manager'] # Execute the query results = db_manager.execute_query(sql) if not results: if format == 'json': click.echo('[]') elif format == 'yaml': click.echo('[]') else: click.echo("No results found.") return # Format and display results formatted_output = format_output(results, format) click.echo(formatted_output) if config['verbose']: click.echo(f"Query returned {len(results)} result(s)", err=True) except ValueError as e: click.echo(f"Query error: {e}", err=True) sys.exit(1) except Exception as e: click.echo(f"Database error: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format') @pass_config def schema(config, format): """ Show database schema and table structure. Display the structure of all tables in the database, including column names, types, and constraints. Examples: markitect schema markitect schema --format json markitect schema --format yaml """ try: if config['verbose']: click.echo("Retrieving database schema...", err=True) db_manager = config['db_manager'] # Get schema information schema_info = db_manager.get_schema() if not schema_info: click.echo("No tables found in database.") return # Format and display schema formatted_output = format_output(schema_info, format) click.echo(formatted_output) if config['verbose']: table_count = len(schema_info) click.echo(f"Schema contains {table_count} table(s)", err=True) except Exception as e: click.echo(f"Schema error: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.argument('file_path', type=str) @click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format') @pass_config def metadata(config, file_path, format): """ Display file metadata and front matter. Show detailed information about a specific file including its front matter, database metadata, and processing information. FILE_PATH: Name of the file to display metadata for Examples: markitect metadata README.md markitect metadata docs/guide.md --format json markitect metadata config.md --format yaml """ try: if config['verbose']: click.echo(f"Retrieving metadata for: {file_path}", err=True) db_manager = config['db_manager'] # Get file information from database file_info = db_manager.get_markdown_file(file_path) if not file_info: click.echo(f"File not found in database: {file_path}", err=True) click.echo("Use 'markitect ingest' to process the file first.", err=True) sys.exit(1) # Parse front matter for better display if file_info.get('front_matter'): try: if isinstance(file_info['front_matter'], str): file_info['front_matter'] = eval(file_info['front_matter']) except (ValueError, TypeError, SyntaxError): if config['verbose']: click.echo("Warning: Could not parse front matter", err=True) # Format and display metadata formatted_output = format_output(file_info, format) click.echo(formatted_output) if config['verbose']: content_length = len(file_info.get('content', '')) click.echo(f"Content length: {content_length} characters", err=True) except Exception as e: click.echo(f"Metadata error: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @pass_config def list(config): """ List all stored files and their status. Shows all markdown files that have been processed and stored in the MarkiTect database with their basic metadata. Examples: markitect list markitect --verbose list # Show detailed information """ try: if config['verbose']: click.echo("Retrieving all stored files...") db_manager = config['db_manager'] files = db_manager.list_markdown_files() if not files: click.echo("No files found in database.") click.echo("Use 'markitect ingest ' to add files.") return click.echo(f"Found {len(files)} file(s):") click.echo() for file_info in files: click.echo(f"📄 {file_info['filename']}") if config['verbose']: click.echo(f" Created: {file_info['created_at']}") if file_info.get('front_matter'): try: front_matter = eval(file_info['front_matter']) if front_matter: click.echo(f" Front matter: {list(front_matter.keys())}") except (ValueError, TypeError, SyntaxError): click.echo(f" Front matter: (parsing error)") click.echo() except Exception as e: click.echo(f"Error listing files: {e}", err=True) if config['verbose']: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('cache-info') @pass_config def cache_info(config): """ Display cache statistics and effectiveness. Shows information about AST cache including directory path, total files cached, cache size, and performance metrics. """ try: cache_service = CacheDirectoryService() stats = cache_service.get_cache_stats() click.echo(f"Cache Directory: {stats['directory']}") click.echo(f"Total Files: {stats['total_files']}") click.echo(f"Cache Size: {stats['size_formatted']}") except Exception as e: click.echo(f"Cache info error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('cache-clean') @pass_config def cache_clean(config): """ Clear cache and free memory. Removes all cached AST files from the cache directory to free up disk space and memory. """ try: cache_service = CacheDirectoryService() result = cache_service.clean_cache() click.echo(result['message']) if not result['success'] and result.get('errors'): for error in result['errors']: click.echo(f"Warning: {error}", err=True) if not result['success']: sys.exit(1) except Exception as e: click.echo(f"Cache clean error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command('cache-invalidate') @click.argument('file_path', type=str) @pass_config def cache_invalidate(config, file_path): """ Invalidate specific file cache. Removes the cached AST for a specific markdown file, forcing it to be re-parsed on next access. Args: file_path: Path to the file whose cache should be invalidated """ try: cache_service = CacheDirectoryService() result = cache_service.invalidate_file_cache(file_path) click.echo(result['message']) if not result['success']: sys.exit(1) except Exception as e: click.echo(f"Cache invalidate error: {e}", err=True) if config and config.get('verbose'): import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) def main(): """ Main entry point for the CLI. This function is referenced in pyproject.toml console_scripts. """ try: cli() except KeyboardInterrupt: click.echo("\nOperation interrupted by user.", err=True) sys.exit(130) # Standard exit code for SIGINT except Exception as e: click.echo(f"Unexpected error: {e}", err=True) sys.exit(1) if __name__ == '__main__': main()