generated from coulomb/repo-seed
Initial schemas and validation with extension workplan
This commit is contained in:
110
src/markitect_tool/schema/validator.py
Normal file
110
src/markitect_tool/schema/validator.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Validate parsed Markdown documents against JSON Schema."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from jsonschema import Draft202012Validator, SchemaError, ValidationError
|
||||
|
||||
from markitect_tool.core import Document, parse_markdown_file
|
||||
from markitect_tool.schema.loader import MarkdownSchema, load_schema_file
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ValidationViolation:
|
||||
"""A single schema validation violation."""
|
||||
|
||||
path: str
|
||||
message: str
|
||||
schema_path: str
|
||||
|
||||
def to_dict(self) -> dict[str, str]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SchemaValidationResult:
|
||||
"""Validation result for one document and one schema."""
|
||||
|
||||
valid: bool
|
||||
violations: list[ValidationViolation]
|
||||
document_path: str | None = None
|
||||
schema_path: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"valid": self.valid,
|
||||
"violations": [violation.to_dict() for violation in self.violations],
|
||||
"document_path": self.document_path,
|
||||
"schema_path": self.schema_path,
|
||||
}
|
||||
return {key: value for key, value in data.items() if value is not None}
|
||||
|
||||
|
||||
def validate_schema(schema: dict[str, Any]) -> SchemaValidationResult:
|
||||
"""Validate that a JSON Schema itself is well formed."""
|
||||
|
||||
try:
|
||||
Draft202012Validator.check_schema(schema)
|
||||
except SchemaError as exc:
|
||||
return SchemaValidationResult(
|
||||
valid=False,
|
||||
violations=[
|
||||
ValidationViolation(
|
||||
path=_format_path(exc.path),
|
||||
message=exc.message,
|
||||
schema_path=_format_path(exc.schema_path),
|
||||
)
|
||||
],
|
||||
)
|
||||
return SchemaValidationResult(valid=True, violations=[])
|
||||
|
||||
|
||||
def validate_markdown_file(
|
||||
markdown_path: str | Path, schema_path: str | Path
|
||||
) -> SchemaValidationResult:
|
||||
"""Parse and validate a Markdown file against a Markdown schema file."""
|
||||
|
||||
document = parse_markdown_file(markdown_path)
|
||||
loaded_schema = load_schema_file(schema_path)
|
||||
return validate_document(document, loaded_schema)
|
||||
|
||||
|
||||
def validate_document(
|
||||
document: Document, schema: MarkdownSchema | dict[str, Any]
|
||||
) -> SchemaValidationResult:
|
||||
"""Validate a parsed document against a loaded or raw JSON Schema."""
|
||||
|
||||
raw_schema = schema.schema if isinstance(schema, MarkdownSchema) else schema
|
||||
schema_path = schema.source_path if isinstance(schema, MarkdownSchema) else None
|
||||
schema_check = validate_schema(raw_schema)
|
||||
if not schema_check.valid:
|
||||
return SchemaValidationResult(
|
||||
valid=False,
|
||||
violations=schema_check.violations,
|
||||
document_path=document.source_path,
|
||||
schema_path=schema_path,
|
||||
)
|
||||
|
||||
validator = Draft202012Validator(raw_schema)
|
||||
violations = [
|
||||
ValidationViolation(
|
||||
path=_format_path(error.path),
|
||||
message=error.message,
|
||||
schema_path=_format_path(error.schema_path),
|
||||
)
|
||||
for error in sorted(validator.iter_errors(document.to_dict()), key=str)
|
||||
]
|
||||
return SchemaValidationResult(
|
||||
valid=not violations,
|
||||
violations=violations,
|
||||
document_path=document.source_path,
|
||||
schema_path=schema_path,
|
||||
)
|
||||
|
||||
|
||||
def _format_path(path: Any) -> str:
|
||||
parts = [str(part) for part in path]
|
||||
return "$" if not parts else "$." + ".".join(parts)
|
||||
Reference in New Issue
Block a user