"""Load document contracts from Markdown files.""" from __future__ import annotations from copy import deepcopy from pathlib import Path from typing import Any import yaml from markitect_tool.contract.model import DocumentContract from markitect_tool.core import parse_markdown class ContractLoaderError(ValueError): """Raised when a contract file cannot be loaded.""" class ContractNotFoundError(ContractLoaderError): """Raised when no contract definition can be found in a Markdown file.""" class InvalidContractFormatError(ContractLoaderError): """Raised when the contract definition is not valid YAML.""" def load_contract_file(path: str | Path) -> DocumentContract: """Load a Markdown-native document contract file.""" file_path = Path(path) text = file_path.read_text(encoding="utf-8") return load_contract_text(text, source_path=str(file_path)) def load_contract_text(text: str, source_path: str | None = None) -> DocumentContract: """Load a document contract from Markdown text.""" document = parse_markdown(text, source_path=source_path) frontmatter_contract = document.frontmatter.get("contract") if frontmatter_contract is not None and not isinstance(frontmatter_contract, dict): raise InvalidContractFormatError("Frontmatter `contract` must be a mapping") block_data, block_line = _extract_contract_block(document.tokens, source_path) merged = _merge_contracts(frontmatter_contract or {}, block_data or {}) metadata = { key: value for key, value in document.frontmatter.items() if key != "contract" } if not merged and _looks_like_contract(metadata): merged = deepcopy(metadata) if not merged: raise ContractNotFoundError( "No contract definition found. Add a fenced ```yaml contract block." ) return DocumentContract.from_mapping( merged, metadata=metadata, source_path=source_path, source_line=block_line, ) def _extract_contract_block( tokens: list[dict[str, Any]], source_path: str | None ) -> tuple[dict[str, Any] | None, int | None]: yaml_candidates: list[tuple[dict[str, Any], int | None, bool]] = [] for token in tokens: if token.get("type") != "fence": continue info = str(token.get("info", "")).strip().lower() if not _is_yaml_info(info): continue line = _token_line(token) raw_yaml = token.get("content", "") try: data = yaml.safe_load(raw_yaml) if raw_yaml.strip() else {} except yaml.YAMLError as exc: raise InvalidContractFormatError( f"Invalid YAML contract block in {source_path or ''}: {exc}" ) from exc if data is None: data = {} if not isinstance(data, dict): raise InvalidContractFormatError("Contract YAML block must be a mapping") yaml_candidates.append((data, line, "contract" in info.split())) for data, line, explicit in yaml_candidates: if explicit: return data, line for data, line, _explicit in yaml_candidates: if _looks_like_contract(data): return data, line return None, None def _is_yaml_info(info: str) -> bool: parts = info.split() return "yaml" in parts or "yml" in parts def _token_line(token: dict[str, Any]) -> int | None: token_map = token.get("map") if not token_map: return None return int(token_map[0]) + 1 def _looks_like_contract(data: dict[str, Any]) -> bool: return any( key in data for key in { "document", "document_type", "document-type", "sections", "fields", "metrics", "metric_bands", "assertions", "forms", "rules", "rubrics", } ) def _merge_contracts( frontmatter_contract: dict[str, Any], block_contract: dict[str, Any] ) -> dict[str, Any]: merged = deepcopy(frontmatter_contract) for key, value in block_contract.items(): if ( isinstance(value, dict) and isinstance(merged.get(key), dict) ): nested = deepcopy(merged[key]) nested.update(value) merged[key] = nested else: merged[key] = value return merged