diff --git a/markitect/cli.py b/markitect/cli.py index 4969f78f..8e77a4fb 100644 --- a/markitect/cli.py +++ b/markitect/cli.py @@ -1771,6 +1771,67 @@ def schema_ingest(config, schema_file, name): sys.exit(1) +@cli.command('schema-auto-ingest') +@pass_config +def schema_auto_ingest(config): + """ + Automatically ingest all schemas from markitect/schemas/ directory. + + Scans the schemas directory for .md schema files and ingests any that + are not already in the database. Skips schemas that have already been + ingested. + + This command is useful for: + - Post-install setup to register bundled schemas + - Development workflow to sync schema changes + - Updating schema registry after package updates + + Examples: + markitect schema-auto-ingest + """ + try: + from .schema_loader import auto_ingest_schemas + from .database import DatabaseManager + + # Initialize database + db_path = config.get('database_path') or str(Path.home() / '.markitect' / 'markitect.db') + db_manager = DatabaseManager(db_path) + db_manager.initialize_database() + + verbose = config.get('verbose', False) + + # Run auto-ingestion + results = auto_ingest_schemas(db_manager=db_manager, verbose=verbose) + + # Summary + if not verbose: + if results['ingested']: + click.echo(f"✅ Ingested {len(results['ingested'])} schema(s)") + for schema_name in results['ingested']: + click.echo(f" - {schema_name}") + + if results['skipped']: + click.echo(f"⏭️ Skipped {len(results['skipped'])} already-ingested schema(s)") + + if results['failed']: + click.echo(f"❌ Failed to ingest {len(results['failed'])} schema(s):") + for schema_name, error in results['failed']: + click.echo(f" - {schema_name}: {error}") + + if not results['ingested'] and not results['failed']: + if not results['skipped']: + click.echo("ℹ️ No schemas found to ingest") + else: + click.echo("✅ All schemas already ingested") + + except Exception as e: + click.echo(f"Auto-ingest error: {e}", err=True) + if config and config.get('verbose'): + import traceback + click.echo(traceback.format_exc(), err=True) + sys.exit(1) + + @cli.command('schema-list') @click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'yaml', 'simple']), default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format') diff --git a/markitect/schema_loader.py b/markitect/schema_loader.py index abcad92d..af334358 100644 --- a/markitect/schema_loader.py +++ b/markitect/schema_loader.py @@ -501,3 +501,110 @@ markitect validate document.md --schema {Path(frontmatter.get('schema-id', 'sche issues.append("$id should be a full HTTPS URL") return issues + + +def auto_ingest_schemas(db_manager=None, schema_dir: Optional[Path] = None, verbose: bool = False) -> Dict[str, Any]: + """Automatically ingest schemas from markitect/schemas/ directory. + + This function scans the schemas directory for .md schema files and ingests + any that are not already in the database. Useful for post-install setup + or automatic schema registration. + + Args: + db_manager: DatabaseManager instance (optional, will create if not provided) + schema_dir: Directory containing schemas (defaults to markitect/schemas/) + verbose: If True, print detailed progress messages + + Returns: + Dictionary with ingestion results: + { + 'ingested': [list of schema names that were ingested], + 'skipped': [list of schema names that were already present], + 'failed': [list of (schema_name, error) tuples for failures] + } + + Example: + >>> from markitect.schema_loader import auto_ingest_schemas + >>> results = auto_ingest_schemas(verbose=True) + >>> print(f"Ingested {len(results['ingested'])} schemas") + """ + # Determine schema directory + if schema_dir is None: + schema_dir = Path(__file__).parent / "schemas" + + if not schema_dir.exists(): + if verbose: + print(f"⚠️ Schema directory not found: {schema_dir}") + return {'ingested': [], 'skipped': [], 'failed': []} + + # Initialize database manager if not provided + if db_manager is None: + from .database import DatabaseManager + db_path = Path.home() / '.markitect' / 'markitect.db' + db_manager = DatabaseManager(str(db_path)) + db_manager.initialize_database() + + # Get list of already ingested schemas + try: + existing_schemas = {schema['name'] for schema in db_manager.list_schemas()} + except Exception as e: + if verbose: + print(f"❌ Error listing existing schemas: {e}") + return {'ingested': [], 'skipped': [], 'failed': []} + + results = { + 'ingested': [], + 'skipped': [], + 'failed': [] + } + + # Find all schema files + schema_files = list(schema_dir.glob("*-schema-v*.md")) + + if verbose and schema_files: + print(f"🔍 Found {len(schema_files)} schema file(s) in {schema_dir}") + + loader = MarkdownSchemaLoader() + + for schema_file in sorted(schema_files): + schema_name = schema_file.name + + # Skip if already ingested + if schema_name in existing_schemas: + results['skipped'].append(schema_name) + if verbose: + print(f"⏭️ Skipping {schema_name} (already ingested)") + continue + + # Try to ingest + try: + # Load schema + schema_data_full = loader.load_schema(schema_file) + schema_data = schema_data_full['schema'] + + # Store in database + schema_content = json.dumps(schema_data, indent=2) + record_id = db_manager.store_schema_file(schema_name, schema_content) + + if record_id: + results['ingested'].append(schema_name) + if verbose: + title = schema_data.get('title', schema_name) + print(f"✅ Ingested {schema_name} (title: {title})") + else: + results['failed'].append((schema_name, "Failed to store in database")) + if verbose: + print(f"❌ Failed to store {schema_name} in database") + + except Exception as e: + results['failed'].append((schema_name, str(e))) + if verbose: + print(f"❌ Failed to ingest {schema_name}: {e}") + + if verbose: + print(f"\n📊 Auto-ingestion complete:") + print(f" Ingested: {len(results['ingested'])}") + print(f" Skipped: {len(results['skipped'])}") + print(f" Failed: {len(results['failed'])}") + + return results