generated from coulomb/repo-seed
111 lines
3.4 KiB
Python
111 lines
3.4 KiB
Python
"""Validate parsed Markdown documents against JSON Schema."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import asdict, dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from jsonschema import Draft202012Validator, SchemaError, ValidationError
|
|
|
|
from markitect_tool.core import Document, parse_markdown_file
|
|
from markitect_tool.schema.loader import MarkdownSchema, load_schema_file
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ValidationViolation:
|
|
"""A single schema validation violation."""
|
|
|
|
path: str
|
|
message: str
|
|
schema_path: str
|
|
|
|
def to_dict(self) -> dict[str, str]:
|
|
return asdict(self)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class SchemaValidationResult:
|
|
"""Validation result for one document and one schema."""
|
|
|
|
valid: bool
|
|
violations: list[ValidationViolation]
|
|
document_path: str | None = None
|
|
schema_path: str | None = None
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
data = {
|
|
"valid": self.valid,
|
|
"violations": [violation.to_dict() for violation in self.violations],
|
|
"document_path": self.document_path,
|
|
"schema_path": self.schema_path,
|
|
}
|
|
return {key: value for key, value in data.items() if value is not None}
|
|
|
|
|
|
def validate_schema(schema: dict[str, Any]) -> SchemaValidationResult:
|
|
"""Validate that a JSON Schema itself is well formed."""
|
|
|
|
try:
|
|
Draft202012Validator.check_schema(schema)
|
|
except SchemaError as exc:
|
|
return SchemaValidationResult(
|
|
valid=False,
|
|
violations=[
|
|
ValidationViolation(
|
|
path=_format_path(exc.path),
|
|
message=exc.message,
|
|
schema_path=_format_path(exc.schema_path),
|
|
)
|
|
],
|
|
)
|
|
return SchemaValidationResult(valid=True, violations=[])
|
|
|
|
|
|
def validate_markdown_file(
|
|
markdown_path: str | Path, schema_path: str | Path
|
|
) -> SchemaValidationResult:
|
|
"""Parse and validate a Markdown file against a Markdown schema file."""
|
|
|
|
document = parse_markdown_file(markdown_path)
|
|
loaded_schema = load_schema_file(schema_path)
|
|
return validate_document(document, loaded_schema)
|
|
|
|
|
|
def validate_document(
|
|
document: Document, schema: MarkdownSchema | dict[str, Any]
|
|
) -> SchemaValidationResult:
|
|
"""Validate a parsed document against a loaded or raw JSON Schema."""
|
|
|
|
raw_schema = schema.schema if isinstance(schema, MarkdownSchema) else schema
|
|
schema_path = schema.source_path if isinstance(schema, MarkdownSchema) else None
|
|
schema_check = validate_schema(raw_schema)
|
|
if not schema_check.valid:
|
|
return SchemaValidationResult(
|
|
valid=False,
|
|
violations=schema_check.violations,
|
|
document_path=document.source_path,
|
|
schema_path=schema_path,
|
|
)
|
|
|
|
validator = Draft202012Validator(raw_schema)
|
|
violations = [
|
|
ValidationViolation(
|
|
path=_format_path(error.path),
|
|
message=error.message,
|
|
schema_path=_format_path(error.schema_path),
|
|
)
|
|
for error in sorted(validator.iter_errors(document.to_dict()), key=str)
|
|
]
|
|
return SchemaValidationResult(
|
|
valid=not violations,
|
|
violations=violations,
|
|
document_path=document.source_path,
|
|
schema_path=schema_path,
|
|
)
|
|
|
|
|
|
def _format_path(path: Any) -> str:
|
|
parts = [str(part) for part in path]
|
|
return "$" if not parts else "$." + ".".join(parts)
|