diff --git a/markitect/cli.py b/markitect/cli.py index 9bf0e19d..d3d930c9 100644 --- a/markitect/cli.py +++ b/markitect/cli.py @@ -1653,14 +1653,16 @@ def schema_ingest(config, schema_file, name): """ Read and store a JSON schema file in the database. - Implements Issue #3 functionality to ingest external schema files - and store them for later use with validation and other operations. + Validates schemas against the MarkiTect metaschema to ensure compatibility + with MarkiTect features like heading text capture and content instructions. + Implements Issue #3 and Issue #50 functionality. SCHEMA_FILE: Path to the JSON schema file to store Examples: markitect schema-ingest my_schema.json markitect schema-ingest external_schema.json --name custom-name + markitect schema-ingest markitect_schema.json -v # Show metaschema validation """ try: # Determine schema name @@ -1677,6 +1679,25 @@ def schema_ingest(config, schema_file, name): click.echo(f"Error: Invalid JSON in schema file - {e}", err=True) sys.exit(1) + # Validate against MarkiTect metaschema + from .metaschema import MetaschemaValidator + try: + metaschema_validator = MetaschemaValidator() + validation_result = metaschema_validator.validate_schema_with_errors(schema_data) + + if not validation_result.is_valid: + click.echo("⚠️ Schema validation warnings against MarkiTect metaschema:", err=True) + for error in validation_result.errors: + click.echo(f" - {error.message}", err=True) + click.echo(" Schema will be stored but may not be fully compatible with MarkiTect features.", err=True) + else: + if config.get('verbose'): + click.echo("✅ Schema validates successfully against MarkiTect metaschema") + + except Exception as e: + if config.get('verbose'): + click.echo(f"⚠️ Could not validate against metaschema: {e}", err=True) + # Initialize database and store schema from .database import DatabaseManager db_path = config.get('database', 'markitect.db') diff --git a/markitect/metaschema.py b/markitect/metaschema.py new file mode 100644 index 00000000..1ddb4692 --- /dev/null +++ b/markitect/metaschema.py @@ -0,0 +1,196 @@ +""" +MarkiTect Metaschema Module for Issue #50 + +This module provides metaschema validation for MarkiTect JSON schemas, +extending standard JSON Schema with MarkiTect-specific features. + +This is a TDD8 implementation - tests are written first, implementation follows. +""" + +from pathlib import Path +from typing import Dict, Any, List, Optional +import json + +# Path to the MarkiTect metaschema JSON file +MARKITECT_METASCHEMA_PATH = Path(__file__).parent / "schemas" / "markitect-metaschema.json" + + +class ValidationError: + """Represents a schema validation error.""" + + def __init__(self, message: str, path: str = ""): + self.message = message + self.path = path + + +class ValidationResult: + """Result of schema validation against metaschema.""" + + def __init__(self, is_valid: bool, errors: List[ValidationError] = None): + self.is_valid = is_valid + self.errors = errors or [] + + +class MetaschemaValidator: + """Validates MarkiTect schemas against the MarkiTect metaschema.""" + + def __init__(self): + """Initialize the metaschema validator.""" + self._metaschema_cache = None + + def get_metaschema(self) -> Dict[str, Any]: + """ + Get the MarkiTect metaschema. + + Returns: + Dictionary containing the metaschema + + Raises: + FileNotFoundError: If metaschema file doesn't exist + json.JSONDecodeError: If metaschema file is invalid JSON + """ + if self._metaschema_cache is None: + if not MARKITECT_METASCHEMA_PATH.exists(): + raise FileNotFoundError(f"Metaschema file not found: {MARKITECT_METASCHEMA_PATH}") + + with open(MARKITECT_METASCHEMA_PATH) as f: + self._metaschema_cache = json.load(f) + + return self._metaschema_cache + + def validate_schema(self, schema: Dict[str, Any]) -> bool: + """ + Validate a schema against the MarkiTect metaschema. + + Args: + schema: The schema to validate + + Returns: + True if valid, False otherwise + """ + result = self.validate_schema_with_errors(schema) + return result.is_valid + + def validate_schema_with_errors(self, schema: Dict[str, Any]) -> ValidationResult: + """ + Validate a schema and return detailed error information. + + Args: + schema: The schema to validate + + Returns: + ValidationResult with validity status and error details + """ + errors = [] + + # Basic JSON Schema validation - check required properties + if not isinstance(schema, dict): + return ValidationResult(False, [ValidationError("Schema must be an object")]) + + # Check for required JSON Schema properties + if "$schema" not in schema: + errors.append(ValidationError("Missing required $schema property")) + + if "type" not in schema: + errors.append(ValidationError("Missing required type property")) + + # Validate MarkiTect extensions + errors.extend(self._validate_markitect_extensions(schema)) + + return ValidationResult(len(errors) == 0, errors) + + def _validate_markitect_extensions(self, schema: Dict[str, Any]) -> List[ValidationError]: + """Validate MarkiTect-specific extensions in the schema.""" + errors = [] + + # Define validation rules for MarkiTect extensions + validation_rules = { + "x-markitect-outline-depth": self._validate_outline_depth, + "x-markitect-outline-mode": self._validate_outline_mode, + "x-markitect-heading-text": self._validate_heading_text, + "x-markitect-content-instructions": self._validate_content_instructions, + "x-markitect-instruction-type": self._validate_instruction_type, + "x-markitect-generation-mode": self._validate_generation_mode, + "x-markitect-generated-from": self._validate_generated_from, + } + + # Apply validation rules + for property_name, validator in validation_rules.items(): + if property_name in schema: + error = validator(schema[property_name], property_name) + if error: + errors.append(error) + + # Recursively validate nested properties + if "properties" in schema: + for prop_name, prop_schema in schema["properties"].items(): + if isinstance(prop_schema, dict): + nested_errors = self._validate_markitect_extensions(prop_schema) + errors.extend(nested_errors) + + return errors + + def _validate_outline_depth(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-outline-depth property.""" + if not isinstance(value, int) or value < 1: + return ValidationError( + "x-markitect-outline-depth must be an integer >= 1", + property_name + ) + return None + + def _validate_outline_mode(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-outline-mode property.""" + if not isinstance(value, bool): + return ValidationError( + "x-markitect-outline-mode must be a boolean", + property_name + ) + return None + + def _validate_heading_text(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-heading-text property.""" + if not isinstance(value, str): + return ValidationError( + "x-markitect-heading-text must be a string", + property_name + ) + return None + + def _validate_content_instructions(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-content-instructions property.""" + if not isinstance(value, str): + return ValidationError( + "x-markitect-content-instructions must be a string", + property_name + ) + return None + + def _validate_instruction_type(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-instruction-type property.""" + valid_types = ["description", "example", "constraint", "template"] + if not isinstance(value, str) or value not in valid_types: + return ValidationError( + f"x-markitect-instruction-type must be one of {valid_types}", + property_name + ) + return None + + def _validate_generation_mode(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-generation-mode property.""" + valid_modes = ["outline", "full"] + if not isinstance(value, str) or value not in valid_modes: + return ValidationError( + f"x-markitect-generation-mode must be one of {valid_modes}", + property_name + ) + return None + + def _validate_generated_from(self, value: Any, property_name: str) -> Optional[ValidationError]: + """Validate x-markitect-generated-from property.""" + if not isinstance(value, str): + return ValidationError( + "x-markitect-generated-from must be a string", + property_name + ) + return None \ No newline at end of file diff --git a/markitect/schemas/markitect-metaschema.json b/markitect/schemas/markitect-metaschema.json new file mode 100644 index 00000000..aaea8ef7 --- /dev/null +++ b/markitect/schemas/markitect-metaschema.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://markitect.io/schemas/markitect-metaschema.json", + "type": "object", + "title": "MarkiTect Extended JSON Schema Metaschema", + "description": "Metaschema for MarkiTect JSON schemas that extends standard JSON Schema with MarkiTect-specific features for document structure analysis and generation", + "allOf": [ + { + "$ref": "http://json-schema.org/draft-07/schema#" + }, + { + "properties": { + "x-markitect-heading-text": { + "type": "string", + "description": "Preserve actual heading text from source document for validation and template generation" + }, + "x-markitect-content-instructions": { + "type": "string", + "description": "Instructions for content authors about what should go in this section" + }, + "x-markitect-outline-mode": { + "type": "boolean", + "description": "Indicates if this schema was generated in outline mode, focusing on structural hierarchy" + }, + "x-markitect-outline-depth": { + "type": "integer", + "minimum": 1, + "description": "Maximum heading depth captured in outline mode" + }, + "x-markitect-instruction-type": { + "type": "string", + "enum": ["description", "example", "constraint", "template"], + "description": "Type of content instruction provided" + }, + "x-markitect-generated-from": { + "type": "string", + "description": "Source file or document this schema was generated from" + }, + "x-markitect-generation-mode": { + "type": "string", + "enum": ["outline", "full"], + "description": "Mode used to generate this schema" + } + }, + "patternProperties": { + "^x-markitect-": { + "description": "MarkiTect extension properties" + } + } + } + ] +} \ No newline at end of file