"""Validate parsed Markdown documents against JSON Schema.""" from __future__ import annotations from dataclasses import asdict, dataclass from pathlib import Path from typing import Any from jsonschema import Draft202012Validator, SchemaError, ValidationError from markitect_tool.core import Document, parse_markdown_file from markitect_tool.schema.loader import MarkdownSchema, load_schema_file @dataclass(frozen=True) class ValidationViolation: """A single schema validation violation.""" path: str message: str schema_path: str def to_dict(self) -> dict[str, str]: return asdict(self) @dataclass(frozen=True) class SchemaValidationResult: """Validation result for one document and one schema.""" valid: bool violations: list[ValidationViolation] document_path: str | None = None schema_path: str | None = None def to_dict(self) -> dict[str, Any]: data = { "valid": self.valid, "violations": [violation.to_dict() for violation in self.violations], "document_path": self.document_path, "schema_path": self.schema_path, } return {key: value for key, value in data.items() if value is not None} def validate_schema(schema: dict[str, Any]) -> SchemaValidationResult: """Validate that a JSON Schema itself is well formed.""" try: Draft202012Validator.check_schema(schema) except SchemaError as exc: return SchemaValidationResult( valid=False, violations=[ ValidationViolation( path=_format_path(exc.path), message=exc.message, schema_path=_format_path(exc.schema_path), ) ], ) return SchemaValidationResult(valid=True, violations=[]) def validate_markdown_file( markdown_path: str | Path, schema_path: str | Path ) -> SchemaValidationResult: """Parse and validate a Markdown file against a Markdown schema file.""" document = parse_markdown_file(markdown_path) loaded_schema = load_schema_file(schema_path) return validate_document(document, loaded_schema) def validate_document( document: Document, schema: MarkdownSchema | dict[str, Any] ) -> SchemaValidationResult: """Validate a parsed document against a loaded or raw JSON Schema.""" raw_schema = schema.schema if isinstance(schema, MarkdownSchema) else schema schema_path = schema.source_path if isinstance(schema, MarkdownSchema) else None schema_check = validate_schema(raw_schema) if not schema_check.valid: return SchemaValidationResult( valid=False, violations=schema_check.violations, document_path=document.source_path, schema_path=schema_path, ) validator = Draft202012Validator(raw_schema) violations = [ ValidationViolation( path=_format_path(error.path), message=error.message, schema_path=_format_path(error.schema_path), ) for error in sorted(validator.iter_errors(document.to_dict()), key=str) ] return SchemaValidationResult( valid=not violations, violations=violations, document_path=document.source_path, schema_path=schema_path, ) def _format_path(path: Any) -> str: parts = [str(part) for part in path] return "$" if not parts else "$." + ".".join(parts)