Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Complete Phase 1 of Schema Evolution Workplan implementing flexible content control and section classification system. ## New Features ### 1. x-markitect-sections Extension - Five classification levels: required, recommended, optional, discouraged, improper - Per-section content constraints (paragraphs, code blocks, lists) - Position hints for section ordering - Custom error/warning messages - Alternative section names support - Content instructions for authors ### 2. x-markitect-content-control Extension - Required/discouraged/forbidden pattern matching - Content quality metrics (word count, readability target, sentence count) - Content instruction arrays - Link validation configuration ### 3. Metaschema Validation - Updated markitect-metaschema.json with complete validation rules - Enhanced metaschema.py with validation methods for both extensions - Comprehensive validation of all extension properties - Clear error messages for invalid schemas ### 4. Documentation & Examples - Complete specification in docs/specifications/schema-extensions-spec.md - Enhanced manpage schema demonstrating all 5 classification levels - API documentation schema showing alternative patterns - Detailed usage examples and validation behavior ## Implementation Details **Files Modified:** - markitect/schemas/markitect-metaschema.json: Added extension definitions - markitect/metaschema.py: Added _validate_sections() and _validate_content_control() **Files Created:** - docs/specifications/schema-extensions-spec.md: Complete specification (v1.0) - examples/manpages/enhanced-manpage-schema.json: Demonstrates all classifications - examples/manpages/api-documentation-schema.json: Shows API doc patterns ## Validation Behavior **Classification Levels:** - required: Missing = ERROR (validation fails) - recommended: Missing = WARNING (validation succeeds with warnings) - optional: No validation impact - discouraged: Present = WARNING (validation succeeds with warnings) - improper: Present = ERROR (validation fails) ## Next Steps Phase 2: Schema Refinement Tools (schema-analyze, schema-refine, schema-compose) Phase 3: Enhanced Validation Engine (classification-aware validation, quality metrics) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
384 lines
17 KiB
Python
384 lines
17 KiB
Python
"""
|
|
MarkiTect Metaschema Module for Issue #50
|
|
|
|
This module provides metaschema validation for MarkiTect JSON schemas,
|
|
extending standard JSON Schema with MarkiTect-specific features.
|
|
|
|
This is a TDD8 implementation - tests are written first, implementation follows.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List, Optional
|
|
import json
|
|
|
|
# Path to the MarkiTect metaschema JSON file
|
|
MARKITECT_METASCHEMA_PATH = Path(__file__).parent / "schemas" / "markitect-metaschema.json"
|
|
|
|
|
|
class ValidationError:
|
|
"""Represents a schema validation error."""
|
|
|
|
def __init__(self, message: str, path: str = ""):
|
|
self.message = message
|
|
self.path = path
|
|
|
|
|
|
class ValidationResult:
|
|
"""Result of schema validation against metaschema."""
|
|
|
|
def __init__(self, is_valid: bool, errors: List[ValidationError] = None):
|
|
self.is_valid = is_valid
|
|
self.errors = errors or []
|
|
|
|
|
|
class MetaschemaValidator:
|
|
"""Validates MarkiTect schemas against the MarkiTect metaschema."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the metaschema validator."""
|
|
self._metaschema_cache = None
|
|
|
|
def get_metaschema(self) -> Dict[str, Any]:
|
|
"""
|
|
Get the MarkiTect metaschema.
|
|
|
|
Returns:
|
|
Dictionary containing the metaschema
|
|
|
|
Raises:
|
|
FileNotFoundError: If metaschema file doesn't exist
|
|
json.JSONDecodeError: If metaschema file is invalid JSON
|
|
"""
|
|
if self._metaschema_cache is None:
|
|
if not MARKITECT_METASCHEMA_PATH.exists():
|
|
raise FileNotFoundError(f"Metaschema file not found: {MARKITECT_METASCHEMA_PATH}")
|
|
|
|
with open(MARKITECT_METASCHEMA_PATH) as f:
|
|
self._metaschema_cache = json.load(f)
|
|
|
|
return self._metaschema_cache
|
|
|
|
def validate_schema(self, schema: Dict[str, Any]) -> bool:
|
|
"""
|
|
Validate a schema against the MarkiTect metaschema.
|
|
|
|
Args:
|
|
schema: The schema to validate
|
|
|
|
Returns:
|
|
True if valid, False otherwise
|
|
"""
|
|
result = self.validate_schema_with_errors(schema)
|
|
return result.is_valid
|
|
|
|
def validate_schema_with_errors(self, schema: Dict[str, Any]) -> ValidationResult:
|
|
"""
|
|
Validate a schema and return detailed error information.
|
|
|
|
Args:
|
|
schema: The schema to validate
|
|
|
|
Returns:
|
|
ValidationResult with validity status and error details
|
|
"""
|
|
errors = []
|
|
|
|
# Basic JSON Schema validation - check required properties
|
|
if not isinstance(schema, dict):
|
|
return ValidationResult(False, [ValidationError("Schema must be an object")])
|
|
|
|
# Check for required JSON Schema properties
|
|
if "$schema" not in schema:
|
|
errors.append(ValidationError("Missing required $schema property"))
|
|
|
|
if "type" not in schema:
|
|
errors.append(ValidationError("Missing required type property"))
|
|
|
|
# Validate MarkiTect extensions
|
|
errors.extend(self._validate_markitect_extensions(schema))
|
|
|
|
return ValidationResult(len(errors) == 0, errors)
|
|
|
|
def _validate_markitect_extensions(self, schema: Dict[str, Any]) -> List[ValidationError]:
|
|
"""Validate MarkiTect-specific extensions in the schema."""
|
|
errors = []
|
|
|
|
# Define validation rules for MarkiTect extensions
|
|
validation_rules = {
|
|
"x-markitect-outline-depth": self._validate_outline_depth,
|
|
"x-markitect-outline-mode": self._validate_outline_mode,
|
|
"x-markitect-heading-text": self._validate_heading_text,
|
|
"x-markitect-content-instructions": self._validate_content_instructions,
|
|
"x-markitect-instruction-type": self._validate_instruction_type,
|
|
"x-markitect-generation-mode": self._validate_generation_mode,
|
|
"x-markitect-generated-from": self._validate_generated_from,
|
|
"x-markitect-sections": self._validate_sections,
|
|
"x-markitect-content-control": self._validate_content_control,
|
|
}
|
|
|
|
# Apply validation rules
|
|
for property_name, validator in validation_rules.items():
|
|
if property_name in schema:
|
|
error = validator(schema[property_name], property_name)
|
|
if error:
|
|
errors.append(error)
|
|
|
|
# Recursively validate nested properties
|
|
if "properties" in schema:
|
|
for prop_name, prop_schema in schema["properties"].items():
|
|
if isinstance(prop_schema, dict):
|
|
nested_errors = self._validate_markitect_extensions(prop_schema)
|
|
errors.extend(nested_errors)
|
|
|
|
return errors
|
|
|
|
def _validate_outline_depth(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-outline-depth property."""
|
|
if not isinstance(value, int) or value < 1:
|
|
return ValidationError(
|
|
"x-markitect-outline-depth must be an integer >= 1",
|
|
property_name
|
|
)
|
|
return None
|
|
|
|
def _validate_outline_mode(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-outline-mode property."""
|
|
if not isinstance(value, bool):
|
|
return ValidationError(
|
|
"x-markitect-outline-mode must be a boolean",
|
|
property_name
|
|
)
|
|
return None
|
|
|
|
def _validate_heading_text(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-heading-text property."""
|
|
if not isinstance(value, str):
|
|
return ValidationError(
|
|
"x-markitect-heading-text must be a string",
|
|
property_name
|
|
)
|
|
return None
|
|
|
|
def _validate_content_instructions(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-content-instructions property."""
|
|
if not isinstance(value, str):
|
|
return ValidationError(
|
|
"x-markitect-content-instructions must be a string",
|
|
property_name
|
|
)
|
|
return None
|
|
|
|
def _validate_instruction_type(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-instruction-type property."""
|
|
valid_types = ["description", "example", "constraint", "template"]
|
|
if not isinstance(value, str) or value not in valid_types:
|
|
return ValidationError(
|
|
f"x-markitect-instruction-type must be one of {valid_types}",
|
|
property_name
|
|
)
|
|
return None
|
|
|
|
def _validate_generation_mode(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-generation-mode property."""
|
|
valid_modes = ["outline", "full"]
|
|
if not isinstance(value, str) or value not in valid_modes:
|
|
return ValidationError(
|
|
f"x-markitect-generation-mode must be one of {valid_modes}",
|
|
property_name
|
|
)
|
|
return None
|
|
|
|
def _validate_generated_from(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-generated-from property."""
|
|
if not isinstance(value, str):
|
|
return ValidationError(
|
|
"x-markitect-generated-from must be a string",
|
|
property_name
|
|
)
|
|
return None
|
|
|
|
def _validate_sections(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-sections property."""
|
|
if not isinstance(value, dict):
|
|
return ValidationError(
|
|
"x-markitect-sections must be an object",
|
|
property_name
|
|
)
|
|
|
|
# Validate each section definition
|
|
for section_name, section_def in value.items():
|
|
# Section name should be UPPERCASE (convention)
|
|
if not isinstance(section_name, str):
|
|
return ValidationError(
|
|
f"Section name must be a string: {section_name}",
|
|
f"{property_name}.{section_name}"
|
|
)
|
|
|
|
if not isinstance(section_def, dict):
|
|
return ValidationError(
|
|
f"Section definition must be an object: {section_name}",
|
|
f"{property_name}.{section_name}"
|
|
)
|
|
|
|
# Validate required 'classification' field
|
|
if "classification" not in section_def:
|
|
return ValidationError(
|
|
f"Section '{section_name}' missing required 'classification' field",
|
|
f"{property_name}.{section_name}"
|
|
)
|
|
|
|
classification = section_def["classification"]
|
|
valid_classifications = ["required", "recommended", "optional", "discouraged", "improper"]
|
|
if classification not in valid_classifications:
|
|
return ValidationError(
|
|
f"Section '{section_name}' has invalid classification '{classification}'. "
|
|
f"Must be one of {valid_classifications}",
|
|
f"{property_name}.{section_name}.classification"
|
|
)
|
|
|
|
# Validate optional fields if present
|
|
if "heading_level" in section_def:
|
|
level = section_def["heading_level"]
|
|
if not isinstance(level, int) or level < 1 or level > 6:
|
|
return ValidationError(
|
|
f"Section '{section_name}' heading_level must be integer 1-6, got {level}",
|
|
f"{property_name}.{section_name}.heading_level"
|
|
)
|
|
|
|
if "position" in section_def:
|
|
position = section_def["position"]
|
|
valid_positions = ["after_title", "before_section_name", "after_section_name", "anywhere"]
|
|
if position not in valid_positions:
|
|
return ValidationError(
|
|
f"Section '{section_name}' has invalid position '{position}'. "
|
|
f"Must be one of {valid_positions}",
|
|
f"{property_name}.{section_name}.position"
|
|
)
|
|
|
|
# Validate content constraints are non-negative integers
|
|
for constraint in ["min_paragraphs", "max_paragraphs", "min_code_blocks",
|
|
"max_code_blocks", "min_lists", "max_lists"]:
|
|
if constraint in section_def:
|
|
value_check = section_def[constraint]
|
|
if not isinstance(value_check, int) or value_check < 0:
|
|
return ValidationError(
|
|
f"Section '{section_name}' {constraint} must be non-negative integer, got {value_check}",
|
|
f"{property_name}.{section_name}.{constraint}"
|
|
)
|
|
|
|
# Validate alternatives is array of strings
|
|
if "alternatives" in section_def:
|
|
alternatives = section_def["alternatives"]
|
|
if not isinstance(alternatives, list):
|
|
return ValidationError(
|
|
f"Section '{section_name}' alternatives must be an array",
|
|
f"{property_name}.{section_name}.alternatives"
|
|
)
|
|
for alt in alternatives:
|
|
if not isinstance(alt, str):
|
|
return ValidationError(
|
|
f"Section '{section_name}' alternative names must be strings",
|
|
f"{property_name}.{section_name}.alternatives"
|
|
)
|
|
|
|
return None
|
|
|
|
def _validate_content_control(self, value: Any, property_name: str) -> Optional[ValidationError]:
|
|
"""Validate x-markitect-content-control property."""
|
|
if not isinstance(value, dict):
|
|
return ValidationError(
|
|
"x-markitect-content-control must be an object",
|
|
property_name
|
|
)
|
|
|
|
# Validate each section's content control rules
|
|
for section_name, control_def in value.items():
|
|
if not isinstance(section_name, str):
|
|
return ValidationError(
|
|
f"Content control section name must be a string: {section_name}",
|
|
f"{property_name}.{section_name}"
|
|
)
|
|
|
|
if not isinstance(control_def, dict):
|
|
return ValidationError(
|
|
f"Content control definition must be an object: {section_name}",
|
|
f"{property_name}.{section_name}"
|
|
)
|
|
|
|
# Validate pattern arrays
|
|
for pattern_type in ["required_patterns", "discouraged_patterns", "forbidden_patterns"]:
|
|
if pattern_type in control_def:
|
|
patterns = control_def[pattern_type]
|
|
if not isinstance(patterns, list):
|
|
return ValidationError(
|
|
f"Content control '{section_name}' {pattern_type} must be an array",
|
|
f"{property_name}.{section_name}.{pattern_type}"
|
|
)
|
|
for pattern in patterns:
|
|
if not isinstance(pattern, str):
|
|
return ValidationError(
|
|
f"Content control '{section_name}' pattern must be string",
|
|
f"{property_name}.{section_name}.{pattern_type}"
|
|
)
|
|
|
|
# Validate content_quality object
|
|
if "content_quality" in control_def:
|
|
quality = control_def["content_quality"]
|
|
if not isinstance(quality, dict):
|
|
return ValidationError(
|
|
f"Content control '{section_name}' content_quality must be an object",
|
|
f"{property_name}.{section_name}.content_quality"
|
|
)
|
|
|
|
# Validate word/sentence counts
|
|
for count_field in ["min_words", "max_words", "min_sentences", "max_sentences"]:
|
|
if count_field in quality:
|
|
count = quality[count_field]
|
|
if not isinstance(count, int) or count < 0:
|
|
return ValidationError(
|
|
f"Content quality '{section_name}' {count_field} must be non-negative integer",
|
|
f"{property_name}.{section_name}.content_quality.{count_field}"
|
|
)
|
|
|
|
# Validate readability_target
|
|
if "readability_target" in quality:
|
|
target = quality["readability_target"]
|
|
valid_targets = ["simple", "general", "technical", "advanced"]
|
|
if target not in valid_targets:
|
|
return ValidationError(
|
|
f"Content quality '{section_name}' readability_target must be one of {valid_targets}",
|
|
f"{property_name}.{section_name}.content_quality.readability_target"
|
|
)
|
|
|
|
# Validate content_instructions array
|
|
if "content_instructions" in control_def:
|
|
instructions = control_def["content_instructions"]
|
|
if not isinstance(instructions, list):
|
|
return ValidationError(
|
|
f"Content control '{section_name}' content_instructions must be an array",
|
|
f"{property_name}.{section_name}.content_instructions"
|
|
)
|
|
for instruction in instructions:
|
|
if not isinstance(instruction, str):
|
|
return ValidationError(
|
|
f"Content control '{section_name}' instruction must be string",
|
|
f"{property_name}.{section_name}.content_instructions"
|
|
)
|
|
|
|
# Validate link_validation object
|
|
if "link_validation" in control_def:
|
|
link_val = control_def["link_validation"]
|
|
if not isinstance(link_val, dict):
|
|
return ValidationError(
|
|
f"Content control '{section_name}' link_validation must be an object",
|
|
f"{property_name}.{section_name}.link_validation"
|
|
)
|
|
for field in ["check_internal", "check_external", "allow_fragments"]:
|
|
if field in link_val:
|
|
if not isinstance(link_val[field], bool):
|
|
return ValidationError(
|
|
f"Content control '{section_name}' link_validation.{field} must be boolean",
|
|
f"{property_name}.{section_name}.link_validation.{field}"
|
|
)
|
|
|
|
return None |