Initial schemas and validation with extension workplan

This commit is contained in:
2026-05-03 22:12:46 +02:00
parent b96b1fb745
commit 8c9129c371
15 changed files with 1025 additions and 2 deletions

View File

@@ -0,0 +1,110 @@
"""Validate parsed Markdown documents against JSON Schema."""
from __future__ import annotations
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any
from jsonschema import Draft202012Validator, SchemaError, ValidationError
from markitect_tool.core import Document, parse_markdown_file
from markitect_tool.schema.loader import MarkdownSchema, load_schema_file
@dataclass(frozen=True)
class ValidationViolation:
"""A single schema validation violation."""
path: str
message: str
schema_path: str
def to_dict(self) -> dict[str, str]:
return asdict(self)
@dataclass(frozen=True)
class SchemaValidationResult:
"""Validation result for one document and one schema."""
valid: bool
violations: list[ValidationViolation]
document_path: str | None = None
schema_path: str | None = None
def to_dict(self) -> dict[str, Any]:
data = {
"valid": self.valid,
"violations": [violation.to_dict() for violation in self.violations],
"document_path": self.document_path,
"schema_path": self.schema_path,
}
return {key: value for key, value in data.items() if value is not None}
def validate_schema(schema: dict[str, Any]) -> SchemaValidationResult:
"""Validate that a JSON Schema itself is well formed."""
try:
Draft202012Validator.check_schema(schema)
except SchemaError as exc:
return SchemaValidationResult(
valid=False,
violations=[
ValidationViolation(
path=_format_path(exc.path),
message=exc.message,
schema_path=_format_path(exc.schema_path),
)
],
)
return SchemaValidationResult(valid=True, violations=[])
def validate_markdown_file(
markdown_path: str | Path, schema_path: str | Path
) -> SchemaValidationResult:
"""Parse and validate a Markdown file against a Markdown schema file."""
document = parse_markdown_file(markdown_path)
loaded_schema = load_schema_file(schema_path)
return validate_document(document, loaded_schema)
def validate_document(
document: Document, schema: MarkdownSchema | dict[str, Any]
) -> SchemaValidationResult:
"""Validate a parsed document against a loaded or raw JSON Schema."""
raw_schema = schema.schema if isinstance(schema, MarkdownSchema) else schema
schema_path = schema.source_path if isinstance(schema, MarkdownSchema) else None
schema_check = validate_schema(raw_schema)
if not schema_check.valid:
return SchemaValidationResult(
valid=False,
violations=schema_check.violations,
document_path=document.source_path,
schema_path=schema_path,
)
validator = Draft202012Validator(raw_schema)
violations = [
ValidationViolation(
path=_format_path(error.path),
message=error.message,
schema_path=_format_path(error.schema_path),
)
for error in sorted(validator.iter_errors(document.to_dict()), key=str)
]
return SchemaValidationResult(
valid=not violations,
violations=violations,
document_path=document.source_path,
schema_path=schema_path,
)
def _format_path(path: Any) -> str:
parts = [str(part) for part in path]
return "$" if not parts else "$." + ".".join(parts)