feat: implement optimization #8 - schema auto-ingestion
Add automated schema ingestion from markitect/schemas/ directory: - Create auto_ingest_schemas() function in schema_loader module - Automatically detect and ingest .md schema files from schemas/ - Skip schemas that are already ingested in database - Return detailed results with ingested/skipped/failed lists - Add 'markitect schema-auto-ingest' CLI command - Support verbose mode for detailed progress reporting - Useful for post-install setup and development workflows This eliminates the manual step of running schema-ingest for each bundled schema file, streamlining schema management. Usage: markitect schema-auto-ingest # Ingest all new schemas markitect schema-auto-ingest --verbose # Show detailed progress 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1771,6 +1771,67 @@ def schema_ingest(config, schema_file, name):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@cli.command('schema-auto-ingest')
|
||||
@pass_config
|
||||
def schema_auto_ingest(config):
|
||||
"""
|
||||
Automatically ingest all schemas from markitect/schemas/ directory.
|
||||
|
||||
Scans the schemas directory for .md schema files and ingests any that
|
||||
are not already in the database. Skips schemas that have already been
|
||||
ingested.
|
||||
|
||||
This command is useful for:
|
||||
- Post-install setup to register bundled schemas
|
||||
- Development workflow to sync schema changes
|
||||
- Updating schema registry after package updates
|
||||
|
||||
Examples:
|
||||
markitect schema-auto-ingest
|
||||
"""
|
||||
try:
|
||||
from .schema_loader import auto_ingest_schemas
|
||||
from .database import DatabaseManager
|
||||
|
||||
# Initialize database
|
||||
db_path = config.get('database_path') or str(Path.home() / '.markitect' / 'markitect.db')
|
||||
db_manager = DatabaseManager(db_path)
|
||||
db_manager.initialize_database()
|
||||
|
||||
verbose = config.get('verbose', False)
|
||||
|
||||
# Run auto-ingestion
|
||||
results = auto_ingest_schemas(db_manager=db_manager, verbose=verbose)
|
||||
|
||||
# Summary
|
||||
if not verbose:
|
||||
if results['ingested']:
|
||||
click.echo(f"✅ Ingested {len(results['ingested'])} schema(s)")
|
||||
for schema_name in results['ingested']:
|
||||
click.echo(f" - {schema_name}")
|
||||
|
||||
if results['skipped']:
|
||||
click.echo(f"⏭️ Skipped {len(results['skipped'])} already-ingested schema(s)")
|
||||
|
||||
if results['failed']:
|
||||
click.echo(f"❌ Failed to ingest {len(results['failed'])} schema(s):")
|
||||
for schema_name, error in results['failed']:
|
||||
click.echo(f" - {schema_name}: {error}")
|
||||
|
||||
if not results['ingested'] and not results['failed']:
|
||||
if not results['skipped']:
|
||||
click.echo("ℹ️ No schemas found to ingest")
|
||||
else:
|
||||
click.echo("✅ All schemas already ingested")
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"Auto-ingest error: {e}", err=True)
|
||||
if config and config.get('verbose'):
|
||||
import traceback
|
||||
click.echo(traceback.format_exc(), err=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@cli.command('schema-list')
|
||||
@click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'yaml', 'simple']),
|
||||
default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format')
|
||||
|
||||
@@ -501,3 +501,110 @@ markitect validate document.md --schema {Path(frontmatter.get('schema-id', 'sche
|
||||
issues.append("$id should be a full HTTPS URL")
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def auto_ingest_schemas(db_manager=None, schema_dir: Optional[Path] = None, verbose: bool = False) -> Dict[str, Any]:
|
||||
"""Automatically ingest schemas from markitect/schemas/ directory.
|
||||
|
||||
This function scans the schemas directory for .md schema files and ingests
|
||||
any that are not already in the database. Useful for post-install setup
|
||||
or automatic schema registration.
|
||||
|
||||
Args:
|
||||
db_manager: DatabaseManager instance (optional, will create if not provided)
|
||||
schema_dir: Directory containing schemas (defaults to markitect/schemas/)
|
||||
verbose: If True, print detailed progress messages
|
||||
|
||||
Returns:
|
||||
Dictionary with ingestion results:
|
||||
{
|
||||
'ingested': [list of schema names that were ingested],
|
||||
'skipped': [list of schema names that were already present],
|
||||
'failed': [list of (schema_name, error) tuples for failures]
|
||||
}
|
||||
|
||||
Example:
|
||||
>>> from markitect.schema_loader import auto_ingest_schemas
|
||||
>>> results = auto_ingest_schemas(verbose=True)
|
||||
>>> print(f"Ingested {len(results['ingested'])} schemas")
|
||||
"""
|
||||
# Determine schema directory
|
||||
if schema_dir is None:
|
||||
schema_dir = Path(__file__).parent / "schemas"
|
||||
|
||||
if not schema_dir.exists():
|
||||
if verbose:
|
||||
print(f"⚠️ Schema directory not found: {schema_dir}")
|
||||
return {'ingested': [], 'skipped': [], 'failed': []}
|
||||
|
||||
# Initialize database manager if not provided
|
||||
if db_manager is None:
|
||||
from .database import DatabaseManager
|
||||
db_path = Path.home() / '.markitect' / 'markitect.db'
|
||||
db_manager = DatabaseManager(str(db_path))
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Get list of already ingested schemas
|
||||
try:
|
||||
existing_schemas = {schema['name'] for schema in db_manager.list_schemas()}
|
||||
except Exception as e:
|
||||
if verbose:
|
||||
print(f"❌ Error listing existing schemas: {e}")
|
||||
return {'ingested': [], 'skipped': [], 'failed': []}
|
||||
|
||||
results = {
|
||||
'ingested': [],
|
||||
'skipped': [],
|
||||
'failed': []
|
||||
}
|
||||
|
||||
# Find all schema files
|
||||
schema_files = list(schema_dir.glob("*-schema-v*.md"))
|
||||
|
||||
if verbose and schema_files:
|
||||
print(f"🔍 Found {len(schema_files)} schema file(s) in {schema_dir}")
|
||||
|
||||
loader = MarkdownSchemaLoader()
|
||||
|
||||
for schema_file in sorted(schema_files):
|
||||
schema_name = schema_file.name
|
||||
|
||||
# Skip if already ingested
|
||||
if schema_name in existing_schemas:
|
||||
results['skipped'].append(schema_name)
|
||||
if verbose:
|
||||
print(f"⏭️ Skipping {schema_name} (already ingested)")
|
||||
continue
|
||||
|
||||
# Try to ingest
|
||||
try:
|
||||
# Load schema
|
||||
schema_data_full = loader.load_schema(schema_file)
|
||||
schema_data = schema_data_full['schema']
|
||||
|
||||
# Store in database
|
||||
schema_content = json.dumps(schema_data, indent=2)
|
||||
record_id = db_manager.store_schema_file(schema_name, schema_content)
|
||||
|
||||
if record_id:
|
||||
results['ingested'].append(schema_name)
|
||||
if verbose:
|
||||
title = schema_data.get('title', schema_name)
|
||||
print(f"✅ Ingested {schema_name} (title: {title})")
|
||||
else:
|
||||
results['failed'].append((schema_name, "Failed to store in database"))
|
||||
if verbose:
|
||||
print(f"❌ Failed to store {schema_name} in database")
|
||||
|
||||
except Exception as e:
|
||||
results['failed'].append((schema_name, str(e)))
|
||||
if verbose:
|
||||
print(f"❌ Failed to ingest {schema_name}: {e}")
|
||||
|
||||
if verbose:
|
||||
print(f"\n📊 Auto-ingestion complete:")
|
||||
print(f" Ingested: {len(results['ingested'])}")
|
||||
print(f" Skipped: {len(results['skipped'])}")
|
||||
print(f" Failed: {len(results['failed'])}")
|
||||
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user