Files
markitect-tool/src/markitect_tool/contract/loader.py

144 lines
4.3 KiB
Python

"""Load document contracts from Markdown files."""
from __future__ import annotations
from copy import deepcopy
from pathlib import Path
from typing import Any
import yaml
from markitect_tool.contract.model import DocumentContract
from markitect_tool.core import parse_markdown
class ContractLoaderError(ValueError):
"""Raised when a contract file cannot be loaded."""
class ContractNotFoundError(ContractLoaderError):
"""Raised when no contract definition can be found in a Markdown file."""
class InvalidContractFormatError(ContractLoaderError):
"""Raised when the contract definition is not valid YAML."""
def load_contract_file(path: str | Path) -> DocumentContract:
"""Load a Markdown-native document contract file."""
file_path = Path(path)
text = file_path.read_text(encoding="utf-8")
return load_contract_text(text, source_path=str(file_path))
def load_contract_text(text: str, source_path: str | None = None) -> DocumentContract:
"""Load a document contract from Markdown text."""
document = parse_markdown(text, source_path=source_path)
frontmatter_contract = document.frontmatter.get("contract")
if frontmatter_contract is not None and not isinstance(frontmatter_contract, dict):
raise InvalidContractFormatError("Frontmatter `contract` must be a mapping")
block_data, block_line = _extract_contract_block(document.tokens, source_path)
merged = _merge_contracts(frontmatter_contract or {}, block_data or {})
metadata = {
key: value
for key, value in document.frontmatter.items()
if key != "contract"
}
if not merged and _looks_like_contract(metadata):
merged = deepcopy(metadata)
if not merged:
raise ContractNotFoundError(
"No contract definition found. Add a fenced ```yaml contract block."
)
return DocumentContract.from_mapping(
merged,
metadata=metadata,
source_path=source_path,
source_line=block_line,
)
def _extract_contract_block(
tokens: list[dict[str, Any]], source_path: str | None
) -> tuple[dict[str, Any] | None, int | None]:
yaml_candidates: list[tuple[dict[str, Any], int | None, bool]] = []
for token in tokens:
if token.get("type") != "fence":
continue
info = str(token.get("info", "")).strip().lower()
if not _is_yaml_info(info):
continue
line = _token_line(token)
raw_yaml = token.get("content", "")
try:
data = yaml.safe_load(raw_yaml) if raw_yaml.strip() else {}
except yaml.YAMLError as exc:
raise InvalidContractFormatError(
f"Invalid YAML contract block in {source_path or '<string>'}: {exc}"
) from exc
if data is None:
data = {}
if not isinstance(data, dict):
raise InvalidContractFormatError("Contract YAML block must be a mapping")
yaml_candidates.append((data, line, "contract" in info.split()))
for data, line, explicit in yaml_candidates:
if explicit:
return data, line
for data, line, _explicit in yaml_candidates:
if _looks_like_contract(data):
return data, line
return None, None
def _is_yaml_info(info: str) -> bool:
parts = info.split()
return "yaml" in parts or "yml" in parts
def _token_line(token: dict[str, Any]) -> int | None:
token_map = token.get("map")
if not token_map:
return None
return int(token_map[0]) + 1
def _looks_like_contract(data: dict[str, Any]) -> bool:
return any(
key in data
for key in {
"document",
"document_type",
"document-type",
"sections",
"fields",
"metrics",
"metric_bands",
"assertions",
"forms",
"rules",
"rubrics",
}
)
def _merge_contracts(
frontmatter_contract: dict[str, Any], block_contract: dict[str, Any]
) -> dict[str, Any]:
merged = deepcopy(frontmatter_contract)
for key, value in block_contract.items():
if (
isinstance(value, dict)
and isinstance(merged.get(key), dict)
):
nested = deepcopy(merged[key])
nested.update(value)
merged[key] = nested
else:
merged[key] = value
return merged