generated from coulomb/repo-seed
Initial schemas and validation with extension workplan
This commit is contained in:
@@ -9,6 +9,14 @@ from markitect_tool.core import (
|
||||
parse_markdown,
|
||||
parse_markdown_file,
|
||||
)
|
||||
from markitect_tool.schema import (
|
||||
MarkdownSchema,
|
||||
SchemaValidationResult,
|
||||
ValidationViolation,
|
||||
load_schema_file,
|
||||
validate_document,
|
||||
validate_markdown_file,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ContentBlock",
|
||||
@@ -18,4 +26,10 @@ __all__ = [
|
||||
"Section",
|
||||
"parse_markdown",
|
||||
"parse_markdown_file",
|
||||
"MarkdownSchema",
|
||||
"SchemaValidationResult",
|
||||
"ValidationViolation",
|
||||
"load_schema_file",
|
||||
"validate_document",
|
||||
"validate_markdown_file",
|
||||
]
|
||||
|
||||
@@ -9,6 +9,7 @@ import click
|
||||
import yaml
|
||||
|
||||
from markitect_tool.core import parse_markdown_file
|
||||
from markitect_tool.schema import load_schema_file, validate_markdown_file, validate_schema
|
||||
|
||||
|
||||
@click.group()
|
||||
@@ -40,5 +41,66 @@ def parse(file: Path, output_format: str) -> None:
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option(
|
||||
"--schema",
|
||||
"schema_file",
|
||||
required=True,
|
||||
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
||||
)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def validate(file: Path, schema_file: Path, output_format: str) -> None:
|
||||
"""Validate a Markdown file against a Markdown schema file."""
|
||||
|
||||
result = validate_markdown_file(file, schema_file)
|
||||
_emit_result(result.to_dict(), output_format)
|
||||
raise click.exceptions.Exit(0 if result.valid else 1)
|
||||
|
||||
|
||||
@main.group()
|
||||
def schema() -> None:
|
||||
"""Work with Markdown schema files."""
|
||||
|
||||
|
||||
@schema.command("validate")
|
||||
@click.argument("schema_file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def schema_validate(schema_file: Path, output_format: str) -> None:
|
||||
"""Validate that a Markdown schema contains a well-formed JSON Schema."""
|
||||
|
||||
loaded = load_schema_file(schema_file)
|
||||
result = validate_schema(loaded.schema)
|
||||
data = result.to_dict() | {"schema_path": str(schema_file)}
|
||||
_emit_result(data, output_format)
|
||||
raise click.exceptions.Exit(0 if result.valid else 1)
|
||||
|
||||
|
||||
def _emit_result(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
elif output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
else:
|
||||
if data.get("valid"):
|
||||
click.echo("valid")
|
||||
else:
|
||||
click.echo("invalid")
|
||||
for violation in data.get("violations", []):
|
||||
click.echo(f"- {violation['path']}: {violation['message']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
31
src/markitect_tool/schema/__init__.py
Normal file
31
src/markitect_tool/schema/__init__.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""Schema loading and validation for structured Markdown documents."""
|
||||
|
||||
from markitect_tool.schema.loader import (
|
||||
InvalidSchemaFormatError,
|
||||
MarkdownSchema,
|
||||
SchemaLoaderError,
|
||||
SchemaNotFoundError,
|
||||
load_schema_file,
|
||||
load_schema_text,
|
||||
)
|
||||
from markitect_tool.schema.validator import (
|
||||
SchemaValidationResult,
|
||||
ValidationViolation,
|
||||
validate_document,
|
||||
validate_markdown_file,
|
||||
validate_schema,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"InvalidSchemaFormatError",
|
||||
"MarkdownSchema",
|
||||
"SchemaLoaderError",
|
||||
"SchemaNotFoundError",
|
||||
"SchemaValidationResult",
|
||||
"ValidationViolation",
|
||||
"load_schema_file",
|
||||
"load_schema_text",
|
||||
"validate_document",
|
||||
"validate_markdown_file",
|
||||
"validate_schema",
|
||||
]
|
||||
124
src/markitect_tool/schema/loader.py
Normal file
124
src/markitect_tool/schema/loader.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""Load JSON Schema definitions embedded in Markdown schema files."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
class SchemaLoaderError(ValueError):
|
||||
"""Base error raised for schema loading failures."""
|
||||
|
||||
|
||||
class SchemaNotFoundError(SchemaLoaderError):
|
||||
"""Raised when no JSON schema block can be found."""
|
||||
|
||||
|
||||
class InvalidSchemaFormatError(SchemaLoaderError):
|
||||
"""Raised when a schema block exists but is not valid JSON object data."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MarkdownSchema:
|
||||
"""A JSON Schema loaded from a Markdown schema document."""
|
||||
|
||||
schema: dict[str, Any]
|
||||
metadata: dict[str, Any]
|
||||
documentation: str
|
||||
source_path: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"schema": self.schema,
|
||||
"metadata": self.metadata,
|
||||
"documentation": self.documentation,
|
||||
"source_path": self.source_path,
|
||||
}
|
||||
return {key: value for key, value in data.items() if value is not None}
|
||||
|
||||
|
||||
_JSON_BLOCK_RE = re.compile(r"```json\s*(.*?)```", re.DOTALL | re.IGNORECASE)
|
||||
|
||||
|
||||
def load_schema_file(path: str | Path) -> MarkdownSchema:
|
||||
"""Load a Markdown schema file."""
|
||||
|
||||
schema_path = Path(path)
|
||||
if not schema_path.exists():
|
||||
raise FileNotFoundError(f"Schema file not found: {schema_path}")
|
||||
return load_schema_text(schema_path.read_text(encoding="utf-8"), source_path=str(schema_path))
|
||||
|
||||
|
||||
def load_schema_text(text: str, source_path: str | None = None) -> MarkdownSchema:
|
||||
"""Load a Markdown schema document from text."""
|
||||
|
||||
metadata, documentation = _split_frontmatter(text)
|
||||
schema = _extract_json_schema(documentation)
|
||||
schema = dict(schema)
|
||||
schema.setdefault(
|
||||
"x-markitect-source",
|
||||
{
|
||||
"format": "markdown",
|
||||
"file": source_path,
|
||||
"frontmatter": metadata,
|
||||
},
|
||||
)
|
||||
return MarkdownSchema(
|
||||
schema=schema,
|
||||
metadata=metadata,
|
||||
documentation=documentation,
|
||||
source_path=source_path,
|
||||
)
|
||||
|
||||
|
||||
def _split_frontmatter(text: str) -> tuple[dict[str, Any], str]:
|
||||
if not text.startswith("---\n"):
|
||||
return {}, text
|
||||
|
||||
end = text.find("\n---", 4)
|
||||
if end == -1:
|
||||
return {}, text
|
||||
|
||||
closing_end = text.find("\n", end + 4)
|
||||
if closing_end == -1:
|
||||
closing_end = len(text)
|
||||
else:
|
||||
closing_end += 1
|
||||
|
||||
raw = text[4:end]
|
||||
try:
|
||||
metadata = yaml.safe_load(raw) if raw.strip() else {}
|
||||
except yaml.YAMLError as exc:
|
||||
raise InvalidSchemaFormatError(f"Invalid schema frontmatter: {exc}") from exc
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
if not isinstance(metadata, dict):
|
||||
raise InvalidSchemaFormatError("Schema frontmatter must be a mapping")
|
||||
return metadata, text[closing_end:]
|
||||
|
||||
|
||||
def _extract_json_schema(text: str) -> dict[str, Any]:
|
||||
candidates = list(_JSON_BLOCK_RE.finditer(text))
|
||||
if not candidates:
|
||||
raise SchemaNotFoundError("No JSON schema found in markdown schema")
|
||||
|
||||
parsed_blocks: list[dict[str, Any]] = []
|
||||
for match in candidates:
|
||||
raw_json = match.group(1).strip()
|
||||
try:
|
||||
data = json.loads(raw_json)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise InvalidSchemaFormatError(f"Invalid JSON schema block: {exc}") from exc
|
||||
if not isinstance(data, dict):
|
||||
raise InvalidSchemaFormatError("JSON schema block must contain an object")
|
||||
parsed_blocks.append(data)
|
||||
|
||||
for data in parsed_blocks:
|
||||
if "$schema" in data or "type" in data:
|
||||
return data
|
||||
return parsed_blocks[0]
|
||||
110
src/markitect_tool/schema/validator.py
Normal file
110
src/markitect_tool/schema/validator.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Validate parsed Markdown documents against JSON Schema."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from jsonschema import Draft202012Validator, SchemaError, ValidationError
|
||||
|
||||
from markitect_tool.core import Document, parse_markdown_file
|
||||
from markitect_tool.schema.loader import MarkdownSchema, load_schema_file
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ValidationViolation:
|
||||
"""A single schema validation violation."""
|
||||
|
||||
path: str
|
||||
message: str
|
||||
schema_path: str
|
||||
|
||||
def to_dict(self) -> dict[str, str]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SchemaValidationResult:
|
||||
"""Validation result for one document and one schema."""
|
||||
|
||||
valid: bool
|
||||
violations: list[ValidationViolation]
|
||||
document_path: str | None = None
|
||||
schema_path: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"valid": self.valid,
|
||||
"violations": [violation.to_dict() for violation in self.violations],
|
||||
"document_path": self.document_path,
|
||||
"schema_path": self.schema_path,
|
||||
}
|
||||
return {key: value for key, value in data.items() if value is not None}
|
||||
|
||||
|
||||
def validate_schema(schema: dict[str, Any]) -> SchemaValidationResult:
|
||||
"""Validate that a JSON Schema itself is well formed."""
|
||||
|
||||
try:
|
||||
Draft202012Validator.check_schema(schema)
|
||||
except SchemaError as exc:
|
||||
return SchemaValidationResult(
|
||||
valid=False,
|
||||
violations=[
|
||||
ValidationViolation(
|
||||
path=_format_path(exc.path),
|
||||
message=exc.message,
|
||||
schema_path=_format_path(exc.schema_path),
|
||||
)
|
||||
],
|
||||
)
|
||||
return SchemaValidationResult(valid=True, violations=[])
|
||||
|
||||
|
||||
def validate_markdown_file(
|
||||
markdown_path: str | Path, schema_path: str | Path
|
||||
) -> SchemaValidationResult:
|
||||
"""Parse and validate a Markdown file against a Markdown schema file."""
|
||||
|
||||
document = parse_markdown_file(markdown_path)
|
||||
loaded_schema = load_schema_file(schema_path)
|
||||
return validate_document(document, loaded_schema)
|
||||
|
||||
|
||||
def validate_document(
|
||||
document: Document, schema: MarkdownSchema | dict[str, Any]
|
||||
) -> SchemaValidationResult:
|
||||
"""Validate a parsed document against a loaded or raw JSON Schema."""
|
||||
|
||||
raw_schema = schema.schema if isinstance(schema, MarkdownSchema) else schema
|
||||
schema_path = schema.source_path if isinstance(schema, MarkdownSchema) else None
|
||||
schema_check = validate_schema(raw_schema)
|
||||
if not schema_check.valid:
|
||||
return SchemaValidationResult(
|
||||
valid=False,
|
||||
violations=schema_check.violations,
|
||||
document_path=document.source_path,
|
||||
schema_path=schema_path,
|
||||
)
|
||||
|
||||
validator = Draft202012Validator(raw_schema)
|
||||
violations = [
|
||||
ValidationViolation(
|
||||
path=_format_path(error.path),
|
||||
message=error.message,
|
||||
schema_path=_format_path(error.schema_path),
|
||||
)
|
||||
for error in sorted(validator.iter_errors(document.to_dict()), key=str)
|
||||
]
|
||||
return SchemaValidationResult(
|
||||
valid=not violations,
|
||||
violations=violations,
|
||||
document_path=document.source_path,
|
||||
schema_path=schema_path,
|
||||
)
|
||||
|
||||
|
||||
def _format_path(path: Any) -> str:
|
||||
parts = [str(part) for part in path]
|
||||
return "$" if not parts else "$." + ".".join(parts)
|
||||
Reference in New Issue
Block a user