Files
infospace-bench/src/infospace_bench/markdown_adapter.py
2026-05-14 14:53:16 +02:00

170 lines
5.1 KiB
Python

from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from markitect_tool import Heading, Section, parse_markdown_file
from markitect_tool.contract import check_markdown_file
from .errors import InfospaceError
from .lifecycle import load_infospace
@dataclass(frozen=True)
class MarkdownDiagnostic:
severity: str
code: str
message: str
source: dict[str, Any] | None = None
contract: dict[str, Any] | None = None
rule_id: str | None = None
guidance: str | None = None
details: dict[str, Any] = field(default_factory=dict)
@classmethod
def from_markitect(cls, diagnostic: Any) -> "MarkdownDiagnostic":
data = diagnostic.to_dict()
return cls(
severity=str(data.get("severity") or ""),
code=str(data.get("code") or ""),
message=str(data.get("message") or ""),
source=data.get("source"),
contract=data.get("contract"),
rule_id=data.get("rule_id"),
guidance=data.get("guidance"),
details=dict(data.get("details") or {}),
)
def to_dict(self) -> dict[str, Any]:
data: dict[str, Any] = {
"severity": self.severity,
"code": self.code,
"message": self.message,
}
if self.source:
data["source"] = self.source
if self.contract:
data["contract"] = self.contract
if self.rule_id:
data["rule_id"] = self.rule_id
if self.guidance:
data["guidance"] = self.guidance
if self.details:
data["details"] = self.details
return data
@dataclass(frozen=True)
class ParsedMarkdownArtifact:
path: Path
frontmatter: dict[str, Any]
headings: list[Heading]
sections: list[Section]
def to_dict(self) -> dict[str, Any]:
return {
"path": str(self.path),
"frontmatter": self.frontmatter,
"headings": [heading.to_dict() for heading in self.headings],
"sections": [section.to_dict() for section in self.sections],
}
@dataclass(frozen=True)
class ArtifactValidationResult:
artifact_id: str
path: str
contract_path: str
valid: bool
diagnostics: list[MarkdownDiagnostic] = field(default_factory=list)
metrics: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"artifact_id": self.artifact_id,
"path": self.path,
"contract_path": self.contract_path,
"valid": self.valid,
"diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
"metrics": self.metrics,
}
def parse_markdown_artifact(path: str | Path) -> ParsedMarkdownArtifact:
artifact_path = Path(path)
document = parse_markdown_file(artifact_path)
return ParsedMarkdownArtifact(
path=artifact_path,
frontmatter=document.frontmatter,
headings=document.headings,
sections=document.sections,
)
def extract_section_text(
parsed: ParsedMarkdownArtifact,
heading: str,
) -> str:
expected = _normalize_heading(heading)
for section in parsed.sections:
if _normalize_heading(section.heading.text) == expected:
return "\n".join(block.text for block in section.blocks if block.text).strip()
return ""
def validate_markdown_artifact(
artifact_id: str,
path: str | Path,
contract_path: str | Path,
) -> ArtifactValidationResult:
artifact_path = Path(path)
contract = Path(contract_path)
result = check_markdown_file(artifact_path, contract)
return ArtifactValidationResult(
artifact_id=artifact_id,
path=str(artifact_path),
contract_path=str(contract),
valid=result.valid,
diagnostics=[
MarkdownDiagnostic.from_markitect(diagnostic)
for diagnostic in result.diagnostics
],
metrics=result.metrics,
)
def validate_infospace_artifacts(root: str | Path) -> list[ArtifactValidationResult]:
infospace = load_infospace(root)
results: list[ArtifactValidationResult] = []
for artifact in infospace.artifacts:
contract_ref = (
infospace.config.schemas.get(artifact.kind)
or infospace.config.schemas.get("artifact")
)
if not contract_ref:
continue
artifact_path = infospace.root / artifact.path
contract_path = infospace.root / contract_ref
if not contract_path.is_file():
raise InfospaceError(
"missing_contract",
f"Configured contract does not exist: {contract_path}",
{
"artifact_id": artifact.id,
"contract_path": str(contract_path),
},
)
results.append(
validate_markdown_artifact(
artifact.id,
artifact_path,
contract_path,
)
)
return results
def _normalize_heading(value: str) -> str:
return " ".join(value.strip().lower().split())