generated from coulomb/repo-seed
144 lines
4.3 KiB
Python
144 lines
4.3 KiB
Python
"""Load document contracts from Markdown files."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from copy import deepcopy
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import yaml
|
|
|
|
from markitect_tool.contract.model import DocumentContract
|
|
from markitect_tool.core import parse_markdown
|
|
|
|
|
|
class ContractLoaderError(ValueError):
|
|
"""Raised when a contract file cannot be loaded."""
|
|
|
|
|
|
class ContractNotFoundError(ContractLoaderError):
|
|
"""Raised when no contract definition can be found in a Markdown file."""
|
|
|
|
|
|
class InvalidContractFormatError(ContractLoaderError):
|
|
"""Raised when the contract definition is not valid YAML."""
|
|
|
|
|
|
def load_contract_file(path: str | Path) -> DocumentContract:
|
|
"""Load a Markdown-native document contract file."""
|
|
|
|
file_path = Path(path)
|
|
text = file_path.read_text(encoding="utf-8")
|
|
return load_contract_text(text, source_path=str(file_path))
|
|
|
|
|
|
def load_contract_text(text: str, source_path: str | None = None) -> DocumentContract:
|
|
"""Load a document contract from Markdown text."""
|
|
|
|
document = parse_markdown(text, source_path=source_path)
|
|
frontmatter_contract = document.frontmatter.get("contract")
|
|
if frontmatter_contract is not None and not isinstance(frontmatter_contract, dict):
|
|
raise InvalidContractFormatError("Frontmatter `contract` must be a mapping")
|
|
|
|
block_data, block_line = _extract_contract_block(document.tokens, source_path)
|
|
merged = _merge_contracts(frontmatter_contract or {}, block_data or {})
|
|
|
|
metadata = {
|
|
key: value
|
|
for key, value in document.frontmatter.items()
|
|
if key != "contract"
|
|
}
|
|
if not merged and _looks_like_contract(metadata):
|
|
merged = deepcopy(metadata)
|
|
if not merged:
|
|
raise ContractNotFoundError(
|
|
"No contract definition found. Add a fenced ```yaml contract block."
|
|
)
|
|
return DocumentContract.from_mapping(
|
|
merged,
|
|
metadata=metadata,
|
|
source_path=source_path,
|
|
source_line=block_line,
|
|
)
|
|
|
|
|
|
def _extract_contract_block(
|
|
tokens: list[dict[str, Any]], source_path: str | None
|
|
) -> tuple[dict[str, Any] | None, int | None]:
|
|
yaml_candidates: list[tuple[dict[str, Any], int | None, bool]] = []
|
|
for token in tokens:
|
|
if token.get("type") != "fence":
|
|
continue
|
|
info = str(token.get("info", "")).strip().lower()
|
|
if not _is_yaml_info(info):
|
|
continue
|
|
line = _token_line(token)
|
|
raw_yaml = token.get("content", "")
|
|
try:
|
|
data = yaml.safe_load(raw_yaml) if raw_yaml.strip() else {}
|
|
except yaml.YAMLError as exc:
|
|
raise InvalidContractFormatError(
|
|
f"Invalid YAML contract block in {source_path or '<string>'}: {exc}"
|
|
) from exc
|
|
if data is None:
|
|
data = {}
|
|
if not isinstance(data, dict):
|
|
raise InvalidContractFormatError("Contract YAML block must be a mapping")
|
|
yaml_candidates.append((data, line, "contract" in info.split()))
|
|
|
|
for data, line, explicit in yaml_candidates:
|
|
if explicit:
|
|
return data, line
|
|
for data, line, _explicit in yaml_candidates:
|
|
if _looks_like_contract(data):
|
|
return data, line
|
|
return None, None
|
|
|
|
|
|
def _is_yaml_info(info: str) -> bool:
|
|
parts = info.split()
|
|
return "yaml" in parts or "yml" in parts
|
|
|
|
|
|
def _token_line(token: dict[str, Any]) -> int | None:
|
|
token_map = token.get("map")
|
|
if not token_map:
|
|
return None
|
|
return int(token_map[0]) + 1
|
|
|
|
|
|
def _looks_like_contract(data: dict[str, Any]) -> bool:
|
|
return any(
|
|
key in data
|
|
for key in {
|
|
"document",
|
|
"document_type",
|
|
"document-type",
|
|
"sections",
|
|
"fields",
|
|
"metrics",
|
|
"metric_bands",
|
|
"assertions",
|
|
"forms",
|
|
"rules",
|
|
"rubrics",
|
|
}
|
|
)
|
|
|
|
|
|
def _merge_contracts(
|
|
frontmatter_contract: dict[str, Any], block_contract: dict[str, Any]
|
|
) -> dict[str, Any]:
|
|
merged = deepcopy(frontmatter_contract)
|
|
for key, value in block_contract.items():
|
|
if (
|
|
isinstance(value, dict)
|
|
and isinstance(merged.get(key), dict)
|
|
):
|
|
nested = deepcopy(merged[key])
|
|
nested.update(value)
|
|
merged[key] = nested
|
|
else:
|
|
merged[key] = value
|
|
return merged
|