CRITICAL MILESTONE: Establish schema-driven architecture foundation that unlocks the entire pathway to HolyGrailRequirement - intelligent arc42 architecture documentation with AI-supported plan-actual comparison capabilities. Major Components Implemented: 🎯 SCHEMA GENERATION SERVICE: • SchemaGenerator class with sophisticated AST analysis capabilities • Depth-limited heading extraction for arc42 section-specific schemas • Comprehensive structural element detection (headings, paragraphs, lists, code blocks, etc.) • JSON Schema Draft 7 compliant output with proper validation metadata • Robust error handling with domain-specific exceptions (FileNotFoundError, InvalidDepthError) 🖥️ CLI INTEGRATION: • generate-schema command with full argument and option support • Multiple output formats (JSON, YAML) with stdout or file output • Configurable depth limiting for architectural document analysis • User-friendly summaries and progress feedback • Integration with existing CLI framework and error handling patterns 📊 COMPREHENSIVE TESTING: • 6 comprehensive test scenarios covering core functionality and edge cases • Perfect integration with architectural test system (71 service layer tests passing) • Test coverage for schema generation, depth limiting, error handling, and JSON compliance • Architectural layer L4 (Service) test placement following reverse dependency principles 🏗️ STRATEGIC ARCHITECTURE: • Leverages existing AST processing infrastructure for maximum efficiency • Builds on proven markdown-it parsing with intelligent caching • Seamless integration with existing CLI framework and configuration system • Foundation for Issues #7 (Schema Validation) and #8 (Validation Errors) Technical Excellence: - Full JSON Schema Draft 7 specification compliance for validator compatibility - Sophisticated AST token analysis with structural pattern recognition - Configurable depth filtering essential for arc42 template compliance - Comprehensive metadata extraction for architectural analysis - Robust exception handling with actionable error messages Strategic Value: - 🎯 33% completion of critical path Phase 1 (Schema Foundation) - 🔑 Unlocks schema validation and error reporting capabilities - 🏛️ Essential building block for arc42 architectural documentation intelligence - 🚀 Direct pathway to AI-supported plan-actual comparison capabilities This implementation transforms MarkiTect from advanced markdown processor toward intelligent architecture documentation platform, establishing the schema-driven foundation critical for achieving the HolyGrailRequirement of arc42 compliance with AI intelligence. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1016 lines
36 KiB
Python
1016 lines
36 KiB
Python
"""
|
|
CLI Entry Point and Basic Commands - Issue #12
|
|
|
|
This module provides the command-line interface for MarkiTect, allowing users
|
|
to interact with core functionality through terminal commands.
|
|
|
|
Commands:
|
|
- ingest: Process and store a markdown file
|
|
- status: Show processing status and metadata for a file
|
|
- list: List all stored files and their status
|
|
|
|
Integration with existing components:
|
|
- Uses DatabaseManager for file storage and retrieval
|
|
- Uses DocumentManager for high-performance document processing
|
|
- Maintains performance caching architecture
|
|
"""
|
|
|
|
import click
|
|
import os
|
|
import sys
|
|
import json
|
|
import yaml
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from tabulate import tabulate
|
|
|
|
from .database import DatabaseManager
|
|
from .document_manager import DocumentManager
|
|
from .serializer import ASTSerializer
|
|
from .cache_service import CacheDirectoryService
|
|
from .ast_service import ASTService
|
|
from .schema_generator import SchemaGenerator
|
|
from .exceptions import FileNotFoundError, InvalidDepthError
|
|
|
|
|
|
# Global options for CLI configuration
|
|
pass_config = click.make_pass_decorator(dict, ensure=True)
|
|
|
|
|
|
def format_output(data, output_format):
|
|
"""
|
|
Format data according to specified output format.
|
|
|
|
Args:
|
|
data: Data to format
|
|
output_format: Format type ('table', 'json', 'yaml')
|
|
|
|
Returns:
|
|
Formatted string output
|
|
"""
|
|
if output_format == 'json':
|
|
return json.dumps(data, indent=2, default=str)
|
|
elif output_format == 'yaml':
|
|
return yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
elif output_format == 'table':
|
|
try:
|
|
# Check if it's a list type
|
|
if isinstance(data, (type([]), type(()))):
|
|
if data and isinstance(data[0], dict):
|
|
# List of dictionaries - format as table
|
|
headers = sorted(data[0].keys())
|
|
rows = []
|
|
for item in data:
|
|
row = []
|
|
for header in headers:
|
|
row.append(item.get(header, ''))
|
|
rows.append(row)
|
|
return tabulate(rows, headers=headers, tablefmt='grid')
|
|
else:
|
|
# List of simple values
|
|
return tabulate([[item] for item in data], headers=['Value'], tablefmt='grid')
|
|
elif isinstance(data, dict):
|
|
# Single dictionary - format as key-value table
|
|
rows = [[key, value] for key, value in data.items()]
|
|
return tabulate(rows, headers=['Key', 'Value'], tablefmt='grid')
|
|
else:
|
|
# Fallback to string representation
|
|
return str(data)
|
|
except Exception as e:
|
|
# Fallback to string if table formatting fails
|
|
return f"Table formatting error: {e}\nData: {str(data)}"
|
|
else:
|
|
# Default to table format
|
|
return format_output(data, 'table')
|
|
|
|
|
|
@click.group()
|
|
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
|
|
@click.option('--config', 'config_file', type=click.Path(exists=True), help='Configuration file path')
|
|
@click.option('--database', type=click.Path(), help='Database file path')
|
|
@pass_config
|
|
def cli(config, verbose, database, config_file):
|
|
"""
|
|
MarkiTect - Advanced Markdown engine for structured content.
|
|
|
|
Process markdown files with front matter support, AST caching,
|
|
and relational metadata queries.
|
|
|
|
Examples:
|
|
markitect ingest document.md # Process a markdown file
|
|
markitect status document.md # Check file status
|
|
markitect list # List all stored files
|
|
"""
|
|
# Store configuration in context
|
|
config['verbose'] = verbose
|
|
config['config_file'] = config_file
|
|
|
|
# Determine database path
|
|
if database:
|
|
config['database_path'] = database
|
|
else:
|
|
# Default database location
|
|
config['database_path'] = os.path.expanduser('~/.markitect/markitect.db')
|
|
|
|
# Initialize database manager and ensure database exists
|
|
try:
|
|
db_manager = DatabaseManager(config['database_path'])
|
|
db_manager.initialize_database()
|
|
config['db_manager'] = db_manager
|
|
|
|
if verbose:
|
|
click.echo(f"Using database: {config['database_path']}", err=True)
|
|
except Exception as e:
|
|
click.echo(f"Error initializing database: {e}", err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('file_path', type=click.Path(exists=True))
|
|
@pass_config
|
|
def ingest(config, file_path):
|
|
"""
|
|
Process and store a markdown file.
|
|
|
|
Ingests a markdown file into the MarkiTect system, parsing its content,
|
|
extracting front matter, generating AST cache, and storing metadata
|
|
in the database.
|
|
|
|
FILE_PATH: Path to the markdown file to process
|
|
|
|
Examples:
|
|
markitect ingest README.md
|
|
markitect ingest docs/guide.md
|
|
"""
|
|
try:
|
|
file_path = Path(file_path)
|
|
|
|
if config['verbose']:
|
|
click.echo(f"Processing file: {file_path}")
|
|
|
|
# Initialize document manager with database manager
|
|
doc_manager = DocumentManager(config['db_manager'])
|
|
|
|
# Ingest the file
|
|
result = doc_manager.ingest_file(file_path)
|
|
|
|
if config['verbose']:
|
|
click.echo(f"Processing results:")
|
|
click.echo(f" File: {result['metadata']['filename']}")
|
|
click.echo(f" AST nodes: {len(result['ast'])} nodes")
|
|
click.echo(f" Cache file: {result['ast_cache_path']}")
|
|
click.echo(f" Parse time: {result['parse_time']:.2f}s")
|
|
click.echo(f" Cache time: {result['cache_time']:.2f}s")
|
|
|
|
click.echo(f"✓ Successfully ingested: {file_path.name}")
|
|
|
|
except FileNotFoundError:
|
|
click.echo(f"Error: File not found: {file_path}", err=True)
|
|
sys.exit(1)
|
|
except PermissionError:
|
|
click.echo(f"Error: Permission denied accessing: {file_path}", err=True)
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
click.echo(f"Error processing file: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('file_path', type=str)
|
|
@pass_config
|
|
def status(config, file_path):
|
|
"""
|
|
Show processing status and metadata for a file.
|
|
|
|
Displays information about a file's processing status, metadata,
|
|
and front matter content from the database.
|
|
|
|
FILE_PATH: Path or name of the file to check
|
|
|
|
Examples:
|
|
markitect status README.md
|
|
markitect status docs/guide.md
|
|
"""
|
|
try:
|
|
if config['verbose']:
|
|
click.echo(f"Checking status for: {file_path}")
|
|
|
|
# Get file information from database
|
|
db_manager = config['db_manager']
|
|
file_info = db_manager.get_markdown_file(file_path)
|
|
|
|
if file_info:
|
|
click.echo(f"File: {file_info['filename']}")
|
|
click.echo(f"Status: Processed")
|
|
click.echo(f"Created: {file_info['created_at']}")
|
|
|
|
if file_info['front_matter']:
|
|
try:
|
|
front_matter = eval(file_info['front_matter']) # Safe for our controlled data
|
|
if front_matter:
|
|
click.echo("Front Matter:")
|
|
for key, value in front_matter.items():
|
|
click.echo(f" {key}: {value}")
|
|
except (ValueError, TypeError, SyntaxError):
|
|
click.echo("Front Matter: (parsing error)")
|
|
elif file_info['front_matter'] is None:
|
|
pass # No front matter to display
|
|
|
|
if config['verbose']:
|
|
content_preview = file_info['content'][:200] + "..." if len(file_info['content']) > 200 else file_info['content']
|
|
click.echo(f"Content preview: {content_preview}")
|
|
else:
|
|
click.echo(f"File not found in database: {file_path}")
|
|
click.echo("Use 'markitect ingest' to process the file first.")
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error checking file status: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('file_path', type=str)
|
|
@click.option('--output', '-o', type=click.Path(), help='Output file path (default: stdout)')
|
|
@pass_config
|
|
def get(config, file_path, output):
|
|
"""
|
|
Retrieve and output a processed markdown file.
|
|
|
|
Loads the file from the database and AST cache, then serializes it back
|
|
to markdown format. Supports outputting to file or stdout.
|
|
|
|
FILE_PATH: Name of the file to retrieve
|
|
|
|
Examples:
|
|
markitect get README.md
|
|
markitect get docs/guide.md --output modified_guide.md
|
|
"""
|
|
try:
|
|
if config['verbose']:
|
|
click.echo(f"Retrieving file: {file_path}")
|
|
|
|
db_manager = config['db_manager']
|
|
|
|
# Get file information from database
|
|
file_info = db_manager.get_markdown_file(file_path)
|
|
if not file_info:
|
|
click.echo(f"File not found in database: {file_path}", err=True)
|
|
click.echo("Use 'markitect ingest' to process the file first.", err=True)
|
|
sys.exit(1)
|
|
|
|
# Load AST from cache
|
|
cache_filename = f"{file_path}.ast.json"
|
|
cache_path = Path('.ast_cache') / cache_filename
|
|
|
|
if not cache_path.exists():
|
|
click.echo(f"AST cache not found: {cache_path}", err=True)
|
|
click.echo("Try re-ingesting the file to regenerate cache.", err=True)
|
|
sys.exit(1)
|
|
|
|
# Read AST from cache
|
|
with open(cache_path, 'r', encoding='utf-8') as f:
|
|
ast = json.load(f)
|
|
|
|
# Parse front matter from database
|
|
front_matter = None
|
|
if file_info.get('front_matter'):
|
|
try:
|
|
front_matter = eval(file_info['front_matter'])
|
|
except (ValueError, TypeError, SyntaxError):
|
|
if config['verbose']:
|
|
click.echo("Warning: Could not parse front matter", err=True)
|
|
|
|
# Serialize AST back to markdown
|
|
serializer = ASTSerializer()
|
|
markdown_content = serializer.serialize_to_markdown(ast, front_matter)
|
|
|
|
# Output to file or stdout
|
|
if output:
|
|
output_path = Path(output)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(markdown_content)
|
|
click.echo(f"✓ File written to: {output_path}")
|
|
else:
|
|
click.echo(markdown_content)
|
|
|
|
if config['verbose']:
|
|
click.echo(f"Retrieved {len(ast)} AST tokens", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error retrieving file: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('file_path', type=str)
|
|
@click.option('--add-section', type=str, help='Add section with title')
|
|
@click.option('--section-content', type=str, default='', help='Content for new section')
|
|
@click.option('--section-level', type=int, default=2, help='Heading level for new section (1-6)')
|
|
@click.option('--update-front-matter', type=str, help='Update front matter (format: key:value)')
|
|
@click.option('--output', '-o', type=click.Path(), help='Output file path (default: overwrite original in cache)')
|
|
@pass_config
|
|
def modify(config, file_path, add_section, section_content, section_level, update_front_matter, output):
|
|
"""
|
|
Modify the content of a processed markdown file.
|
|
|
|
Loads the file from cache, applies modifications, and updates the cache
|
|
or outputs to a new file. Supports adding sections and updating front matter.
|
|
|
|
FILE_PATH: Name of the file to modify
|
|
|
|
Examples:
|
|
markitect modify README.md --add-section "New Section" --section-content "New content"
|
|
markitect modify doc.md --update-front-matter "status:updated"
|
|
markitect modify doc.md --add-section "Notes" --output modified_doc.md
|
|
"""
|
|
try:
|
|
if config['verbose']:
|
|
click.echo(f"Modifying file: {file_path}")
|
|
|
|
db_manager = config['db_manager']
|
|
|
|
# Get file information from database
|
|
file_info = db_manager.get_markdown_file(file_path)
|
|
if not file_info:
|
|
click.echo(f"File not found in database: {file_path}", err=True)
|
|
click.echo("Use 'markitect ingest' to process the file first.", err=True)
|
|
sys.exit(1)
|
|
|
|
# Load AST from cache
|
|
cache_filename = f"{file_path}.ast.json"
|
|
cache_path = Path('.ast_cache') / cache_filename
|
|
|
|
if not cache_path.exists():
|
|
click.echo(f"AST cache not found: {cache_path}", err=True)
|
|
click.echo("Try re-ingesting the file to regenerate cache.", err=True)
|
|
sys.exit(1)
|
|
|
|
# Read AST from cache
|
|
with open(cache_path, 'r', encoding='utf-8') as f:
|
|
ast = json.load(f)
|
|
|
|
# Parse front matter from database
|
|
front_matter = {}
|
|
if file_info.get('front_matter'):
|
|
try:
|
|
front_matter = eval(file_info['front_matter']) or {}
|
|
except (ValueError, TypeError, SyntaxError):
|
|
if config['verbose']:
|
|
click.echo("Warning: Could not parse existing front matter", err=True)
|
|
|
|
# Prepare modifications
|
|
modifications = {}
|
|
changes_made = []
|
|
|
|
# Handle add-section modification
|
|
if add_section:
|
|
modifications['add_section'] = {
|
|
'title': add_section,
|
|
'content': section_content,
|
|
'level': section_level
|
|
}
|
|
changes_made.append(f"Added section: {add_section}")
|
|
|
|
# Handle front matter updates
|
|
if update_front_matter:
|
|
try:
|
|
if ':' in update_front_matter:
|
|
key, value = update_front_matter.split(':', 1)
|
|
key = key.strip()
|
|
value = value.strip()
|
|
|
|
# Try to parse value as appropriate type
|
|
if value.lower() in ['true', 'false']:
|
|
value = value.lower() == 'true'
|
|
elif value.isdigit():
|
|
value = int(value)
|
|
elif value.replace('.', '').isdigit():
|
|
value = float(value)
|
|
|
|
front_matter[key] = value
|
|
changes_made.append(f"Updated front matter: {key} = {value}")
|
|
else:
|
|
click.echo("Invalid front matter format. Use 'key:value'", err=True)
|
|
sys.exit(1)
|
|
except ValueError as e:
|
|
click.echo(f"Error parsing front matter update: {e}", err=True)
|
|
sys.exit(1)
|
|
|
|
if not changes_made:
|
|
click.echo("No modifications specified. Use --add-section or --update-front-matter", err=True)
|
|
sys.exit(1)
|
|
|
|
# Apply modifications to AST
|
|
serializer = ASTSerializer()
|
|
if modifications:
|
|
ast = serializer.modify_ast_content(ast, modifications)
|
|
|
|
# Serialize back to markdown
|
|
markdown_content = serializer.serialize_to_markdown(ast, front_matter)
|
|
|
|
# Handle output
|
|
if output:
|
|
# Write to specified output file
|
|
output_path = Path(output)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(markdown_content)
|
|
click.echo(f"✓ Modified file written to: {output_path}")
|
|
else:
|
|
# Update the cache and database with modifications
|
|
with open(cache_path, 'w', encoding='utf-8') as f:
|
|
json.dump(ast, f, indent=2, ensure_ascii=False)
|
|
|
|
# Update database with new front matter
|
|
if front_matter:
|
|
# Note: This would require extending DatabaseManager to update front matter
|
|
# For now, we'll just note the modification
|
|
if config['verbose']:
|
|
click.echo("Note: Database front matter update not implemented yet", err=True)
|
|
|
|
click.echo(f"✓ Modified file updated in cache: {file_path}")
|
|
|
|
# Show changes made
|
|
if config['verbose']:
|
|
click.echo("Changes applied:", err=True)
|
|
for change in changes_made:
|
|
click.echo(f" - {change}", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error modifying file: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('sql', type=str)
|
|
@click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format')
|
|
@pass_config
|
|
def query(config, sql, format):
|
|
"""
|
|
Execute SQL query against the database.
|
|
|
|
Execute read-only SQL queries to explore and analyze document metadata.
|
|
Only SELECT and WITH statements are allowed for security.
|
|
|
|
SQL: SQL query to execute (SELECT statements only)
|
|
|
|
Examples:
|
|
markitect query "SELECT filename, created_at FROM markdown_files"
|
|
markitect query "SELECT COUNT(*) as total FROM markdown_files" --format json
|
|
markitect query "SELECT * FROM markdown_files WHERE filename LIKE '%.md'" --format yaml
|
|
"""
|
|
try:
|
|
if config['verbose']:
|
|
click.echo(f"Executing query: {sql}", err=True)
|
|
|
|
db_manager = config['db_manager']
|
|
|
|
# Execute the query
|
|
results = db_manager.execute_query(sql)
|
|
|
|
if not results:
|
|
if format == 'json':
|
|
click.echo('[]')
|
|
elif format == 'yaml':
|
|
click.echo('[]')
|
|
else:
|
|
click.echo("No results found.")
|
|
return
|
|
|
|
# Format and display results
|
|
formatted_output = format_output(results, format)
|
|
click.echo(formatted_output)
|
|
|
|
if config['verbose']:
|
|
click.echo(f"Query returned {len(results)} result(s)", err=True)
|
|
|
|
except ValueError as e:
|
|
click.echo(f"Query error: {e}", err=True)
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
click.echo(f"Database error: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format')
|
|
@pass_config
|
|
def schema(config, format):
|
|
"""
|
|
Show database schema and table structure.
|
|
|
|
Display the structure of all tables in the database, including
|
|
column names, types, and constraints.
|
|
|
|
Examples:
|
|
markitect schema
|
|
markitect schema --format json
|
|
markitect schema --format yaml
|
|
"""
|
|
try:
|
|
if config['verbose']:
|
|
click.echo("Retrieving database schema...", err=True)
|
|
|
|
db_manager = config['db_manager']
|
|
|
|
# Get schema information
|
|
schema_info = db_manager.get_schema()
|
|
|
|
if not schema_info:
|
|
click.echo("No tables found in database.")
|
|
return
|
|
|
|
# Format and display schema
|
|
formatted_output = format_output(schema_info, format)
|
|
click.echo(formatted_output)
|
|
|
|
if config['verbose']:
|
|
table_count = len(schema_info)
|
|
click.echo(f"Schema contains {table_count} table(s)", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Schema error: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('file_path', type=str)
|
|
@click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format')
|
|
@pass_config
|
|
def metadata(config, file_path, format):
|
|
"""
|
|
Display file metadata and front matter.
|
|
|
|
Show detailed information about a specific file including its
|
|
front matter, database metadata, and processing information.
|
|
|
|
FILE_PATH: Name of the file to display metadata for
|
|
|
|
Examples:
|
|
markitect metadata README.md
|
|
markitect metadata docs/guide.md --format json
|
|
markitect metadata config.md --format yaml
|
|
"""
|
|
try:
|
|
if config['verbose']:
|
|
click.echo(f"Retrieving metadata for: {file_path}", err=True)
|
|
|
|
db_manager = config['db_manager']
|
|
|
|
# Get file information from database
|
|
file_info = db_manager.get_markdown_file(file_path)
|
|
|
|
if not file_info:
|
|
click.echo(f"File not found in database: {file_path}", err=True)
|
|
click.echo("Use 'markitect ingest' to process the file first.", err=True)
|
|
sys.exit(1)
|
|
|
|
# Parse front matter for better display
|
|
if file_info.get('front_matter'):
|
|
try:
|
|
if isinstance(file_info['front_matter'], str):
|
|
file_info['front_matter'] = eval(file_info['front_matter'])
|
|
except (ValueError, TypeError, SyntaxError):
|
|
if config['verbose']:
|
|
click.echo("Warning: Could not parse front matter", err=True)
|
|
|
|
# Format and display metadata
|
|
formatted_output = format_output(file_info, format)
|
|
click.echo(formatted_output)
|
|
|
|
if config['verbose']:
|
|
content_length = len(file_info.get('content', ''))
|
|
click.echo(f"Content length: {content_length} characters", err=True)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Metadata error: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command()
|
|
@pass_config
|
|
def list(config):
|
|
"""
|
|
List all stored files and their status.
|
|
|
|
Shows all markdown files that have been processed and stored
|
|
in the MarkiTect database with their basic metadata.
|
|
|
|
Examples:
|
|
markitect list
|
|
markitect --verbose list # Show detailed information
|
|
"""
|
|
try:
|
|
if config['verbose']:
|
|
click.echo("Retrieving all stored files...")
|
|
|
|
db_manager = config['db_manager']
|
|
files = db_manager.list_markdown_files()
|
|
|
|
if not files:
|
|
click.echo("No files found in database.")
|
|
click.echo("Use 'markitect ingest <file>' to add files.")
|
|
return
|
|
|
|
click.echo(f"Found {len(files)} file(s):")
|
|
click.echo()
|
|
|
|
for file_info in files:
|
|
click.echo(f"📄 {file_info['filename']}")
|
|
if config['verbose']:
|
|
click.echo(f" Created: {file_info['created_at']}")
|
|
if file_info.get('front_matter'):
|
|
try:
|
|
front_matter = eval(file_info['front_matter'])
|
|
if front_matter:
|
|
click.echo(f" Front matter: {list(front_matter.keys())}")
|
|
except (ValueError, TypeError, SyntaxError):
|
|
click.echo(f" Front matter: (parsing error)")
|
|
click.echo()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error listing files: {e}", err=True)
|
|
if config['verbose']:
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command('cache-info')
|
|
@pass_config
|
|
def cache_info(config):
|
|
"""
|
|
Display cache statistics and effectiveness.
|
|
|
|
Shows information about AST cache including directory path,
|
|
total files cached, cache size, and performance metrics.
|
|
"""
|
|
try:
|
|
cache_service = CacheDirectoryService()
|
|
stats = cache_service.get_cache_stats()
|
|
|
|
click.echo(f"Cache Directory: {stats['directory']}")
|
|
click.echo(f"Total Files: {stats['total_files']}")
|
|
click.echo(f"Cache Size: {stats['size_formatted']}")
|
|
|
|
except Exception as e:
|
|
click.echo(f"Cache info error: {e}", err=True)
|
|
if config and config.get('verbose'):
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command('cache-clean')
|
|
@pass_config
|
|
def cache_clean(config):
|
|
"""
|
|
Clear cache and free memory.
|
|
|
|
Removes all cached AST files from the cache directory
|
|
to free up disk space and memory.
|
|
"""
|
|
try:
|
|
cache_service = CacheDirectoryService()
|
|
result = cache_service.clean_cache()
|
|
|
|
click.echo(result['message'])
|
|
|
|
if not result['success'] and result.get('errors'):
|
|
for error in result['errors']:
|
|
click.echo(f"Warning: {error}", err=True)
|
|
|
|
if not result['success']:
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Cache clean error: {e}", err=True)
|
|
if config and config.get('verbose'):
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command('cache-invalidate')
|
|
@click.argument('file_path', type=str)
|
|
@pass_config
|
|
def cache_invalidate(config, file_path):
|
|
"""
|
|
Invalidate specific file cache.
|
|
|
|
Removes the cached AST for a specific markdown file,
|
|
forcing it to be re-parsed on next access.
|
|
|
|
Args:
|
|
file_path: Path to the file whose cache should be invalidated
|
|
"""
|
|
try:
|
|
cache_service = CacheDirectoryService()
|
|
result = cache_service.invalidate_file_cache(file_path)
|
|
|
|
click.echo(result['message'])
|
|
|
|
if not result['success']:
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Cache invalidate error: {e}", err=True)
|
|
if config and config.get('verbose'):
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command('ast-show')
|
|
@click.argument('file_path', type=click.Path(exists=False))
|
|
@click.option('--format', '-f', type=click.Choice(['tree', 'json', 'compact']), default='tree', help='Display format')
|
|
@pass_config
|
|
def ast_show(config, file_path, format):
|
|
"""
|
|
Display AST structure for file.
|
|
|
|
Shows the Abstract Syntax Tree representation of a markdown file
|
|
with various formatting options for analysis and debugging.
|
|
|
|
FILE_PATH: Path to the markdown file to analyze
|
|
|
|
Examples:
|
|
markitect ast-show document.md
|
|
markitect ast-show document.md --format json
|
|
markitect ast-show document.md --format compact
|
|
"""
|
|
try:
|
|
if config.get('verbose'):
|
|
click.echo(f"Analyzing AST structure for: {file_path}", err=True)
|
|
|
|
ast_service = ASTService()
|
|
result = ast_service.display_ast(Path(file_path), format)
|
|
|
|
if result['success']:
|
|
if result.get('message'):
|
|
if config.get('verbose'):
|
|
click.echo(f"Info: {result['message']}", err=True)
|
|
click.echo(result['output'])
|
|
|
|
if config.get('verbose') and result.get('token_count'):
|
|
click.echo(f"Total tokens: {result['token_count']}", err=True)
|
|
else:
|
|
click.echo(f"Error: {result['message']}", err=True)
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
click.echo(f"AST display error: {e}", err=True)
|
|
if config and config.get('verbose'):
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command('ast-query')
|
|
@click.argument('file_path', type=click.Path(exists=False))
|
|
@click.argument('jsonpath', type=str)
|
|
@click.option('--format', '-f', type=click.Choice(['json', 'compact']), default='json', help='Output format')
|
|
@pass_config
|
|
def ast_query(config, file_path, jsonpath, format):
|
|
"""
|
|
Query AST using JSONPath.
|
|
|
|
Execute JSONPath expressions against the AST structure of a markdown file
|
|
to extract specific elements or patterns.
|
|
|
|
FILE_PATH: Path to the markdown file to query
|
|
JSONPATH: JSONPath expression to execute
|
|
|
|
Examples:
|
|
markitect ast-query doc.md '$.*.type'
|
|
markitect ast-query doc.md '$..tag'
|
|
markitect ast-query doc.md '$[:5]' --format compact
|
|
"""
|
|
try:
|
|
if config.get('verbose'):
|
|
click.echo(f"Executing JSONPath query on: {file_path}", err=True)
|
|
click.echo(f"Query: {jsonpath}", err=True)
|
|
|
|
ast_service = ASTService()
|
|
result = ast_service.query_ast(Path(file_path), jsonpath)
|
|
|
|
if result['success']:
|
|
if config.get('verbose'):
|
|
click.echo(f"Query results: {result['count']} matches", err=True)
|
|
|
|
if result['count'] == 0:
|
|
click.echo("No matches found for query.")
|
|
else:
|
|
if format == 'compact':
|
|
for i, match in enumerate(result['matches']):
|
|
if isinstance(match, dict):
|
|
token_type = match.get('type', 'unknown')
|
|
content = match.get('content', match.get('tag', ''))[:30]
|
|
click.echo(f"[{i}] {token_type}: {content}")
|
|
else:
|
|
click.echo(f"[{i}] {match}")
|
|
else:
|
|
import json
|
|
click.echo(json.dumps(result['matches'], indent=2, ensure_ascii=False))
|
|
else:
|
|
click.echo(f"Error: {result['message']}", err=True)
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
click.echo(f"AST query error: {e}", err=True)
|
|
if config and config.get('verbose'):
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command('ast-stats')
|
|
@click.argument('file_path', type=click.Path(exists=False))
|
|
@click.option('--format', '-f', type=click.Choice(['table', 'json', 'yaml']), default='table', help='Output format')
|
|
@pass_config
|
|
def ast_stats(config, file_path, format):
|
|
"""
|
|
Show AST statistics (headings, links, etc.).
|
|
|
|
Analyze markdown file structure and provide comprehensive statistics
|
|
about document elements, organization, and content patterns.
|
|
|
|
FILE_PATH: Path to the markdown file to analyze
|
|
|
|
Examples:
|
|
markitect ast-stats document.md
|
|
markitect ast-stats document.md --format json
|
|
markitect ast-stats document.md --format yaml
|
|
"""
|
|
try:
|
|
if config.get('verbose'):
|
|
click.echo(f"Calculating statistics for: {file_path}", err=True)
|
|
|
|
ast_service = ASTService()
|
|
result = ast_service.analyze_ast_statistics(Path(file_path))
|
|
|
|
if result['success']:
|
|
if config.get('verbose'):
|
|
click.echo(f"Analysis complete for: {Path(file_path).name}", err=True)
|
|
|
|
stats = result['statistics']
|
|
if format == 'table':
|
|
# Format statistics as readable table
|
|
click.echo("Document Statistics:")
|
|
click.echo("=" * 40)
|
|
click.echo(f"Total AST tokens: {stats.get('total_tokens', 0)}")
|
|
click.echo(f"Document structure: {stats.get('document_structure', 'unknown')}")
|
|
click.echo()
|
|
|
|
# Headings
|
|
headings = stats.get('headings', {})
|
|
click.echo(f"Headings: {headings.get('total', 0)}")
|
|
for level, count in headings.get('by_level', {}).items():
|
|
click.echo(f" {level.upper()}: {count}")
|
|
|
|
click.echo(f"Paragraphs: {stats.get('paragraphs', 0)}")
|
|
click.echo(f"Links: {stats.get('links', 0)}")
|
|
|
|
# Lists
|
|
lists = stats.get('lists', {})
|
|
total_lists = lists.get('ordered', 0) + lists.get('unordered', 0)
|
|
click.echo(f"Lists: {total_lists}")
|
|
if total_lists > 0:
|
|
click.echo(f" Ordered: {lists.get('ordered', 0)}")
|
|
click.echo(f" Unordered: {lists.get('unordered', 0)}")
|
|
|
|
click.echo(f"Code blocks: {stats.get('code_blocks', 0)}")
|
|
click.echo(f"Inline code: {stats.get('inline_code', 0)}")
|
|
click.echo(f"Blockquotes: {stats.get('blockquotes', 0)}")
|
|
|
|
# Emphasis
|
|
emphasis = stats.get('emphasis', {})
|
|
click.echo(f"Strong text: {emphasis.get('strong', 0)}")
|
|
click.echo(f"Italic text: {emphasis.get('italic', 0)}")
|
|
|
|
elif format == 'json':
|
|
import json
|
|
click.echo(json.dumps(stats, indent=2, ensure_ascii=False))
|
|
elif format == 'yaml':
|
|
import yaml
|
|
click.echo(yaml.dump(stats, default_flow_style=False, allow_unicode=True))
|
|
|
|
else:
|
|
click.echo(f"Error: {result['message']}", err=True)
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
click.echo(f"AST statistics error: {e}", err=True)
|
|
if config and config.get('verbose'):
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
@cli.command('generate-schema')
|
|
@click.argument('file_path', type=click.Path(exists=True, path_type=Path))
|
|
@click.option('--max-depth', '-d', type=int, help='Maximum heading depth to include in schema')
|
|
@click.option('--output', '-o', type=click.Path(path_type=Path), help='Output file path (default: stdout)')
|
|
@click.option('--format', 'output_format', type=click.Choice(['json', 'yaml']), default='json', help='Output format')
|
|
@pass_config
|
|
def generate_schema(config, file_path, max_depth, output, output_format):
|
|
"""
|
|
Generate a JSON schema from a markdown file's AST structure.
|
|
|
|
FILE_PATH: Path to the markdown file to analyze
|
|
|
|
Example:
|
|
markitect generate-schema document.md
|
|
markitect generate-schema document.md --max-depth 2
|
|
markitect generate-schema document.md --output schema.json
|
|
"""
|
|
try:
|
|
# Initialize schema generator
|
|
generator = SchemaGenerator()
|
|
|
|
# Generate schema
|
|
schema = generator.generate_schema_from_file(file_path, max_depth=max_depth)
|
|
|
|
# Format output
|
|
if output_format == 'json':
|
|
formatted_output = json.dumps(schema, indent=2, ensure_ascii=False)
|
|
elif output_format == 'yaml':
|
|
formatted_output = yaml.dump(schema, default_flow_style=False, allow_unicode=True)
|
|
else:
|
|
formatted_output = json.dumps(schema, indent=2, ensure_ascii=False)
|
|
|
|
# Write to output
|
|
if output:
|
|
output.write_text(formatted_output, encoding='utf-8')
|
|
click.echo(f"Schema written to: {output}")
|
|
|
|
# Show summary
|
|
properties = schema.get('properties', {})
|
|
click.echo(f"Generated schema with {len(properties)} property types")
|
|
|
|
if 'headings' in properties:
|
|
heading_levels = len(properties['headings'].get('properties', {}))
|
|
click.echo(f" - {heading_levels} heading levels found")
|
|
|
|
structural_elements = ['paragraphs', 'lists', 'code_blocks', 'blockquotes', 'tables']
|
|
found_elements = [elem for elem in structural_elements if elem in properties]
|
|
if found_elements:
|
|
click.echo(f" - Structural elements: {', '.join(found_elements)}")
|
|
else:
|
|
click.echo(formatted_output)
|
|
|
|
except FileNotFoundError as e:
|
|
click.echo(f"File not found: {e}", err=True)
|
|
sys.exit(1)
|
|
except InvalidDepthError as e:
|
|
click.echo(f"Invalid depth parameter: {e}", err=True)
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
click.echo(f"Schema generation error: {e}", err=True)
|
|
if config and config.get('verbose'):
|
|
import traceback
|
|
click.echo(traceback.format_exc(), err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
def main():
|
|
"""
|
|
Main entry point for the CLI.
|
|
|
|
This function is referenced in pyproject.toml console_scripts.
|
|
"""
|
|
try:
|
|
cli()
|
|
except KeyboardInterrupt:
|
|
click.echo("\nOperation interrupted by user.", err=True)
|
|
sys.exit(130) # Standard exit code for SIGINT
|
|
except Exception as e:
|
|
click.echo(f"Unexpected error: {e}", err=True)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |