Files
markitect-main/markitect/metaschema.py
tegwick 22008875d3 feat: Complete Issue #50 - Define metaschema for JSON schema structure
Implement comprehensive MarkiTect metaschema that extends standard JSON Schema
with MarkiTect-specific features for document analysis and generation.

🎯 TDD8 Implementation Complete:
- ISSUE: Analyzed existing schema system and requirements
- TEST: 15 comprehensive tests covering all features
- RED: Verified tests fail before implementation
- GREEN: Implemented metaschema JSON and validation logic
- REFACTOR: Clean, extensible validator architecture
- DOCUMENT: Updated CLI help and comprehensive documentation
- REFINE: 100% test success rate and CLI integration
- PUBLISH: Ready for production use

 Key Features Implemented:
- Heading text capture support (x-markitect-heading-text)
- Content field instructions (x-markitect-content-instructions)
- Outline structure representation (x-markitect-outline-mode/depth)
- Backward compatibility with existing schemas
- Validation rules for all new features
- CLI integration in schema-ingest command

📁 Files Added:
- markitect/metaschema.py - Validation logic and MetaschemaValidator
- markitect/schemas/markitect-metaschema.json - Metaschema definition
- Enhanced markitect/cli.py - Automatic metaschema validation

🧪 Testing:
- 15 comprehensive tests (100% passing)
- RED-GREEN-REFACTOR cycle validated
- CLI integration tested and working
- Backward compatibility verified

📋 Acceptance Criteria Met:
 Schema metaschema supports heading text capture
 Schema metaschema supports content field instructions
 Schema metaschema supports outline structure representation
 Schema metaschema is backward compatible with existing schemas
 Schema metaschema includes validation rules for new features
 Documentation explains the metaschema structure and usage

🔗 Foundation for Future Issues:
- Issue #51: Outline mode schema generation
- Issue #52: Heading text capture in schemas
- Issue #54: Content instruction capabilities
- Issue #55: Schema-based draft generation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-01 02:39:29 +02:00

196 lines
7.2 KiB
Python

"""
MarkiTect Metaschema Module for Issue #50
This module provides metaschema validation for MarkiTect JSON schemas,
extending standard JSON Schema with MarkiTect-specific features.
This is a TDD8 implementation - tests are written first, implementation follows.
"""
from pathlib import Path
from typing import Dict, Any, List, Optional
import json
# Path to the MarkiTect metaschema JSON file
MARKITECT_METASCHEMA_PATH = Path(__file__).parent / "schemas" / "markitect-metaschema.json"
class ValidationError:
"""Represents a schema validation error."""
def __init__(self, message: str, path: str = ""):
self.message = message
self.path = path
class ValidationResult:
"""Result of schema validation against metaschema."""
def __init__(self, is_valid: bool, errors: List[ValidationError] = None):
self.is_valid = is_valid
self.errors = errors or []
class MetaschemaValidator:
"""Validates MarkiTect schemas against the MarkiTect metaschema."""
def __init__(self):
"""Initialize the metaschema validator."""
self._metaschema_cache = None
def get_metaschema(self) -> Dict[str, Any]:
"""
Get the MarkiTect metaschema.
Returns:
Dictionary containing the metaschema
Raises:
FileNotFoundError: If metaschema file doesn't exist
json.JSONDecodeError: If metaschema file is invalid JSON
"""
if self._metaschema_cache is None:
if not MARKITECT_METASCHEMA_PATH.exists():
raise FileNotFoundError(f"Metaschema file not found: {MARKITECT_METASCHEMA_PATH}")
with open(MARKITECT_METASCHEMA_PATH) as f:
self._metaschema_cache = json.load(f)
return self._metaschema_cache
def validate_schema(self, schema: Dict[str, Any]) -> bool:
"""
Validate a schema against the MarkiTect metaschema.
Args:
schema: The schema to validate
Returns:
True if valid, False otherwise
"""
result = self.validate_schema_with_errors(schema)
return result.is_valid
def validate_schema_with_errors(self, schema: Dict[str, Any]) -> ValidationResult:
"""
Validate a schema and return detailed error information.
Args:
schema: The schema to validate
Returns:
ValidationResult with validity status and error details
"""
errors = []
# Basic JSON Schema validation - check required properties
if not isinstance(schema, dict):
return ValidationResult(False, [ValidationError("Schema must be an object")])
# Check for required JSON Schema properties
if "$schema" not in schema:
errors.append(ValidationError("Missing required $schema property"))
if "type" not in schema:
errors.append(ValidationError("Missing required type property"))
# Validate MarkiTect extensions
errors.extend(self._validate_markitect_extensions(schema))
return ValidationResult(len(errors) == 0, errors)
def _validate_markitect_extensions(self, schema: Dict[str, Any]) -> List[ValidationError]:
"""Validate MarkiTect-specific extensions in the schema."""
errors = []
# Define validation rules for MarkiTect extensions
validation_rules = {
"x-markitect-outline-depth": self._validate_outline_depth,
"x-markitect-outline-mode": self._validate_outline_mode,
"x-markitect-heading-text": self._validate_heading_text,
"x-markitect-content-instructions": self._validate_content_instructions,
"x-markitect-instruction-type": self._validate_instruction_type,
"x-markitect-generation-mode": self._validate_generation_mode,
"x-markitect-generated-from": self._validate_generated_from,
}
# Apply validation rules
for property_name, validator in validation_rules.items():
if property_name in schema:
error = validator(schema[property_name], property_name)
if error:
errors.append(error)
# Recursively validate nested properties
if "properties" in schema:
for prop_name, prop_schema in schema["properties"].items():
if isinstance(prop_schema, dict):
nested_errors = self._validate_markitect_extensions(prop_schema)
errors.extend(nested_errors)
return errors
def _validate_outline_depth(self, value: Any, property_name: str) -> Optional[ValidationError]:
"""Validate x-markitect-outline-depth property."""
if not isinstance(value, int) or value < 1:
return ValidationError(
"x-markitect-outline-depth must be an integer >= 1",
property_name
)
return None
def _validate_outline_mode(self, value: Any, property_name: str) -> Optional[ValidationError]:
"""Validate x-markitect-outline-mode property."""
if not isinstance(value, bool):
return ValidationError(
"x-markitect-outline-mode must be a boolean",
property_name
)
return None
def _validate_heading_text(self, value: Any, property_name: str) -> Optional[ValidationError]:
"""Validate x-markitect-heading-text property."""
if not isinstance(value, str):
return ValidationError(
"x-markitect-heading-text must be a string",
property_name
)
return None
def _validate_content_instructions(self, value: Any, property_name: str) -> Optional[ValidationError]:
"""Validate x-markitect-content-instructions property."""
if not isinstance(value, str):
return ValidationError(
"x-markitect-content-instructions must be a string",
property_name
)
return None
def _validate_instruction_type(self, value: Any, property_name: str) -> Optional[ValidationError]:
"""Validate x-markitect-instruction-type property."""
valid_types = ["description", "example", "constraint", "template"]
if not isinstance(value, str) or value not in valid_types:
return ValidationError(
f"x-markitect-instruction-type must be one of {valid_types}",
property_name
)
return None
def _validate_generation_mode(self, value: Any, property_name: str) -> Optional[ValidationError]:
"""Validate x-markitect-generation-mode property."""
valid_modes = ["outline", "full"]
if not isinstance(value, str) or value not in valid_modes:
return ValidationError(
f"x-markitect-generation-mode must be one of {valid_modes}",
property_name
)
return None
def _validate_generated_from(self, value: Any, property_name: str) -> Optional[ValidationError]:
"""Validate x-markitect-generated-from property."""
if not isinstance(value, str):
return ValidationError(
"x-markitect-generated-from must be a string",
property_name
)
return None