""" MarkiTect Metaschema Module for Issue #50 This module provides metaschema validation for MarkiTect JSON schemas, extending standard JSON Schema with MarkiTect-specific features. This is a TDD8 implementation - tests are written first, implementation follows. """ from pathlib import Path from typing import Dict, Any, List, Optional import json # Path to the MarkiTect metaschema JSON file MARKITECT_METASCHEMA_PATH = Path(__file__).parent / "schemas" / "markitect-metaschema.json" class ValidationError: """Represents a schema validation error.""" def __init__(self, message: str, path: str = ""): self.message = message self.path = path class ValidationResult: """Result of schema validation against metaschema.""" def __init__(self, is_valid: bool, errors: List[ValidationError] = None): self.is_valid = is_valid self.errors = errors or [] class MetaschemaValidator: """Validates MarkiTect schemas against the MarkiTect metaschema.""" def __init__(self): """Initialize the metaschema validator.""" self._metaschema_cache = None def get_metaschema(self) -> Dict[str, Any]: """ Get the MarkiTect metaschema. Returns: Dictionary containing the metaschema Raises: FileNotFoundError: If metaschema file doesn't exist json.JSONDecodeError: If metaschema file is invalid JSON """ if self._metaschema_cache is None: if not MARKITECT_METASCHEMA_PATH.exists(): raise FileNotFoundError(f"Metaschema file not found: {MARKITECT_METASCHEMA_PATH}") with open(MARKITECT_METASCHEMA_PATH) as f: self._metaschema_cache = json.load(f) return self._metaschema_cache def validate_schema(self, schema: Dict[str, Any]) -> bool: """ Validate a schema against the MarkiTect metaschema. Args: schema: The schema to validate Returns: True if valid, False otherwise """ result = self.validate_schema_with_errors(schema) return result.is_valid def validate_schema_with_errors(self, schema: Dict[str, Any]) -> ValidationResult: """ Validate a schema and return detailed error information. Args: schema: The schema to validate Returns: ValidationResult with validity status and error details """ errors = [] # Basic JSON Schema validation - check required properties if not isinstance(schema, dict): return ValidationResult(False, [ValidationError("Schema must be an object")]) # Check for required JSON Schema properties if "$schema" not in schema: errors.append(ValidationError("Missing required $schema property")) if "type" not in schema: errors.append(ValidationError("Missing required type property")) # Validate MarkiTect extensions errors.extend(self._validate_markitect_extensions(schema)) return ValidationResult(len(errors) == 0, errors) def _validate_markitect_extensions(self, schema: Dict[str, Any]) -> List[ValidationError]: """Validate MarkiTect-specific extensions in the schema.""" errors = [] # Define validation rules for MarkiTect extensions validation_rules = { "x-markitect-outline-depth": self._validate_outline_depth, "x-markitect-outline-mode": self._validate_outline_mode, "x-markitect-heading-text": self._validate_heading_text, "x-markitect-content-instructions": self._validate_content_instructions, "x-markitect-instruction-type": self._validate_instruction_type, "x-markitect-generation-mode": self._validate_generation_mode, "x-markitect-generated-from": self._validate_generated_from, "x-markitect-sections": self._validate_sections, "x-markitect-content-control": self._validate_content_control, } # Apply validation rules for property_name, validator in validation_rules.items(): if property_name in schema: error = validator(schema[property_name], property_name) if error: errors.append(error) # Recursively validate nested properties if "properties" in schema: for prop_name, prop_schema in schema["properties"].items(): if isinstance(prop_schema, dict): nested_errors = self._validate_markitect_extensions(prop_schema) errors.extend(nested_errors) return errors def _validate_outline_depth(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-outline-depth property.""" if not isinstance(value, int) or value < 1: return ValidationError( "x-markitect-outline-depth must be an integer >= 1", property_name ) return None def _validate_outline_mode(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-outline-mode property.""" if not isinstance(value, bool): return ValidationError( "x-markitect-outline-mode must be a boolean", property_name ) return None def _validate_heading_text(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-heading-text property.""" if not isinstance(value, str): return ValidationError( "x-markitect-heading-text must be a string", property_name ) return None def _validate_content_instructions(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-content-instructions property.""" if not isinstance(value, str): return ValidationError( "x-markitect-content-instructions must be a string", property_name ) return None def _validate_instruction_type(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-instruction-type property.""" valid_types = ["description", "example", "constraint", "template"] if not isinstance(value, str) or value not in valid_types: return ValidationError( f"x-markitect-instruction-type must be one of {valid_types}", property_name ) return None def _validate_generation_mode(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-generation-mode property.""" valid_modes = ["outline", "full"] if not isinstance(value, str) or value not in valid_modes: return ValidationError( f"x-markitect-generation-mode must be one of {valid_modes}", property_name ) return None def _validate_generated_from(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-generated-from property.""" if not isinstance(value, str): return ValidationError( "x-markitect-generated-from must be a string", property_name ) return None def _validate_sections(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-sections property.""" if not isinstance(value, dict): return ValidationError( "x-markitect-sections must be an object", property_name ) # Validate each section definition for section_name, section_def in value.items(): # Section name should be UPPERCASE (convention) if not isinstance(section_name, str): return ValidationError( f"Section name must be a string: {section_name}", f"{property_name}.{section_name}" ) if not isinstance(section_def, dict): return ValidationError( f"Section definition must be an object: {section_name}", f"{property_name}.{section_name}" ) # Validate required 'classification' field if "classification" not in section_def: return ValidationError( f"Section '{section_name}' missing required 'classification' field", f"{property_name}.{section_name}" ) classification = section_def["classification"] valid_classifications = ["required", "recommended", "optional", "discouraged", "improper"] if classification not in valid_classifications: return ValidationError( f"Section '{section_name}' has invalid classification '{classification}'. " f"Must be one of {valid_classifications}", f"{property_name}.{section_name}.classification" ) # Validate optional fields if present if "heading_level" in section_def: level = section_def["heading_level"] if not isinstance(level, int) or level < 1 or level > 6: return ValidationError( f"Section '{section_name}' heading_level must be integer 1-6, got {level}", f"{property_name}.{section_name}.heading_level" ) if "position" in section_def: position = section_def["position"] valid_positions = ["after_title", "before_section_name", "after_section_name", "anywhere"] if position not in valid_positions: return ValidationError( f"Section '{section_name}' has invalid position '{position}'. " f"Must be one of {valid_positions}", f"{property_name}.{section_name}.position" ) # Validate content constraints are non-negative integers for constraint in ["min_paragraphs", "max_paragraphs", "min_code_blocks", "max_code_blocks", "min_lists", "max_lists"]: if constraint in section_def: value_check = section_def[constraint] if not isinstance(value_check, int) or value_check < 0: return ValidationError( f"Section '{section_name}' {constraint} must be non-negative integer, got {value_check}", f"{property_name}.{section_name}.{constraint}" ) # Validate alternatives is array of strings if "alternatives" in section_def: alternatives = section_def["alternatives"] if not isinstance(alternatives, list): return ValidationError( f"Section '{section_name}' alternatives must be an array", f"{property_name}.{section_name}.alternatives" ) for alt in alternatives: if not isinstance(alt, str): return ValidationError( f"Section '{section_name}' alternative names must be strings", f"{property_name}.{section_name}.alternatives" ) return None def _validate_content_control(self, value: Any, property_name: str) -> Optional[ValidationError]: """Validate x-markitect-content-control property.""" if not isinstance(value, dict): return ValidationError( "x-markitect-content-control must be an object", property_name ) # Validate each section's content control rules for section_name, control_def in value.items(): if not isinstance(section_name, str): return ValidationError( f"Content control section name must be a string: {section_name}", f"{property_name}.{section_name}" ) if not isinstance(control_def, dict): return ValidationError( f"Content control definition must be an object: {section_name}", f"{property_name}.{section_name}" ) # Validate pattern arrays for pattern_type in ["required_patterns", "discouraged_patterns", "forbidden_patterns"]: if pattern_type in control_def: patterns = control_def[pattern_type] if not isinstance(patterns, list): return ValidationError( f"Content control '{section_name}' {pattern_type} must be an array", f"{property_name}.{section_name}.{pattern_type}" ) for pattern in patterns: if not isinstance(pattern, str): return ValidationError( f"Content control '{section_name}' pattern must be string", f"{property_name}.{section_name}.{pattern_type}" ) # Validate content_quality object if "content_quality" in control_def: quality = control_def["content_quality"] if not isinstance(quality, dict): return ValidationError( f"Content control '{section_name}' content_quality must be an object", f"{property_name}.{section_name}.content_quality" ) # Validate word/sentence counts for count_field in ["min_words", "max_words", "min_sentences", "max_sentences"]: if count_field in quality: count = quality[count_field] if not isinstance(count, int) or count < 0: return ValidationError( f"Content quality '{section_name}' {count_field} must be non-negative integer", f"{property_name}.{section_name}.content_quality.{count_field}" ) # Validate readability_target if "readability_target" in quality: target = quality["readability_target"] valid_targets = ["simple", "general", "technical", "advanced"] if target not in valid_targets: return ValidationError( f"Content quality '{section_name}' readability_target must be one of {valid_targets}", f"{property_name}.{section_name}.content_quality.readability_target" ) # Validate content_instructions array if "content_instructions" in control_def: instructions = control_def["content_instructions"] if not isinstance(instructions, list): return ValidationError( f"Content control '{section_name}' content_instructions must be an array", f"{property_name}.{section_name}.content_instructions" ) for instruction in instructions: if not isinstance(instruction, str): return ValidationError( f"Content control '{section_name}' instruction must be string", f"{property_name}.{section_name}.content_instructions" ) # Validate link_validation object if "link_validation" in control_def: link_val = control_def["link_validation"] if not isinstance(link_val, dict): return ValidationError( f"Content control '{section_name}' link_validation must be an object", f"{property_name}.{section_name}.link_validation" ) for field in ["check_internal", "check_external", "allow_fragments"]: if field in link_val: if not isinstance(link_val[field], bool): return ValidationError( f"Content control '{section_name}' link_validation.{field} must be boolean", f"{property_name}.{section_name}.link_validation.{field}" ) return None