feat: implement optimization #8 - schema auto-ingestion

Add automated schema ingestion from markitect/schemas/ directory:

- Create auto_ingest_schemas() function in schema_loader module
- Automatically detect and ingest .md schema files from schemas/
- Skip schemas that are already ingested in database
- Return detailed results with ingested/skipped/failed lists
- Add 'markitect schema-auto-ingest' CLI command
- Support verbose mode for detailed progress reporting
- Useful for post-install setup and development workflows

This eliminates the manual step of running schema-ingest for each
bundled schema file, streamlining schema management.

Usage:
  markitect schema-auto-ingest           # Ingest all new schemas
  markitect schema-auto-ingest --verbose # Show detailed progress

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-06 21:34:46 +01:00
parent 7f696582a9
commit 7515b9c0e5
2 changed files with 168 additions and 0 deletions

View File

@@ -1771,6 +1771,67 @@ def schema_ingest(config, schema_file, name):
sys.exit(1)
@cli.command('schema-auto-ingest')
@pass_config
def schema_auto_ingest(config):
"""
Automatically ingest all schemas from markitect/schemas/ directory.
Scans the schemas directory for .md schema files and ingests any that
are not already in the database. Skips schemas that have already been
ingested.
This command is useful for:
- Post-install setup to register bundled schemas
- Development workflow to sync schema changes
- Updating schema registry after package updates
Examples:
markitect schema-auto-ingest
"""
try:
from .schema_loader import auto_ingest_schemas
from .database import DatabaseManager
# Initialize database
db_path = config.get('database_path') or str(Path.home() / '.markitect' / 'markitect.db')
db_manager = DatabaseManager(db_path)
db_manager.initialize_database()
verbose = config.get('verbose', False)
# Run auto-ingestion
results = auto_ingest_schemas(db_manager=db_manager, verbose=verbose)
# Summary
if not verbose:
if results['ingested']:
click.echo(f"✅ Ingested {len(results['ingested'])} schema(s)")
for schema_name in results['ingested']:
click.echo(f" - {schema_name}")
if results['skipped']:
click.echo(f"⏭️ Skipped {len(results['skipped'])} already-ingested schema(s)")
if results['failed']:
click.echo(f"❌ Failed to ingest {len(results['failed'])} schema(s):")
for schema_name, error in results['failed']:
click.echo(f" - {schema_name}: {error}")
if not results['ingested'] and not results['failed']:
if not results['skipped']:
click.echo(" No schemas found to ingest")
else:
click.echo("✅ All schemas already ingested")
except Exception as e:
click.echo(f"Auto-ingest error: {e}", err=True)
if config and config.get('verbose'):
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@cli.command('schema-list')
@click.option('--format', 'output_format', type=click.Choice(['table', 'json', 'yaml', 'simple']),
default=lambda: get_default_format(['table', 'json', 'yaml', 'simple']), help='Output format')

View File

@@ -501,3 +501,110 @@ markitect validate document.md --schema {Path(frontmatter.get('schema-id', 'sche
issues.append("$id should be a full HTTPS URL")
return issues
def auto_ingest_schemas(db_manager=None, schema_dir: Optional[Path] = None, verbose: bool = False) -> Dict[str, Any]:
"""Automatically ingest schemas from markitect/schemas/ directory.
This function scans the schemas directory for .md schema files and ingests
any that are not already in the database. Useful for post-install setup
or automatic schema registration.
Args:
db_manager: DatabaseManager instance (optional, will create if not provided)
schema_dir: Directory containing schemas (defaults to markitect/schemas/)
verbose: If True, print detailed progress messages
Returns:
Dictionary with ingestion results:
{
'ingested': [list of schema names that were ingested],
'skipped': [list of schema names that were already present],
'failed': [list of (schema_name, error) tuples for failures]
}
Example:
>>> from markitect.schema_loader import auto_ingest_schemas
>>> results = auto_ingest_schemas(verbose=True)
>>> print(f"Ingested {len(results['ingested'])} schemas")
"""
# Determine schema directory
if schema_dir is None:
schema_dir = Path(__file__).parent / "schemas"
if not schema_dir.exists():
if verbose:
print(f"⚠️ Schema directory not found: {schema_dir}")
return {'ingested': [], 'skipped': [], 'failed': []}
# Initialize database manager if not provided
if db_manager is None:
from .database import DatabaseManager
db_path = Path.home() / '.markitect' / 'markitect.db'
db_manager = DatabaseManager(str(db_path))
db_manager.initialize_database()
# Get list of already ingested schemas
try:
existing_schemas = {schema['name'] for schema in db_manager.list_schemas()}
except Exception as e:
if verbose:
print(f"❌ Error listing existing schemas: {e}")
return {'ingested': [], 'skipped': [], 'failed': []}
results = {
'ingested': [],
'skipped': [],
'failed': []
}
# Find all schema files
schema_files = list(schema_dir.glob("*-schema-v*.md"))
if verbose and schema_files:
print(f"🔍 Found {len(schema_files)} schema file(s) in {schema_dir}")
loader = MarkdownSchemaLoader()
for schema_file in sorted(schema_files):
schema_name = schema_file.name
# Skip if already ingested
if schema_name in existing_schemas:
results['skipped'].append(schema_name)
if verbose:
print(f"⏭️ Skipping {schema_name} (already ingested)")
continue
# Try to ingest
try:
# Load schema
schema_data_full = loader.load_schema(schema_file)
schema_data = schema_data_full['schema']
# Store in database
schema_content = json.dumps(schema_data, indent=2)
record_id = db_manager.store_schema_file(schema_name, schema_content)
if record_id:
results['ingested'].append(schema_name)
if verbose:
title = schema_data.get('title', schema_name)
print(f"✅ Ingested {schema_name} (title: {title})")
else:
results['failed'].append((schema_name, "Failed to store in database"))
if verbose:
print(f"❌ Failed to store {schema_name} in database")
except Exception as e:
results['failed'].append((schema_name, str(e)))
if verbose:
print(f"❌ Failed to ingest {schema_name}: {e}")
if verbose:
print(f"\n📊 Auto-ingestion complete:")
print(f" Ingested: {len(results['ingested'])}")
print(f" Skipped: {len(results['skipped'])}")
print(f" Failed: {len(results['failed'])}")
return results