markitect-main/markitect/infospace/classification_io.py

"""
Read/write utilities for entity classification files (L2).

Classification files use YAML frontmatter (machine-readable) plus a
markdown body (human-readable), matching the convention used by evaluation
files.
"""

from __future__ import annotations

from pathlib import Path
from typing import List

import yaml

from .classification import EntityClassification


_FRONTMATTER_SEP = "---"


def write_entity_classification(c: EntityClassification, path: Path) -> None:
    """Write a per-entity classification as YAML frontmatter + markdown body."""
    fm = c.to_dict()

    lines: List[str] = []
    lines.append(_FRONTMATTER_SEP)
    lines.append(yaml.safe_dump(fm, default_flow_style=False, sort_keys=False).rstrip())
    lines.append(_FRONTMATTER_SEP)
    lines.append("")

    title = c.entity_slug.replace("_", " ").replace("-", " ").title()
    lines.append(f"# Classification: {title}")
    lines.append("")

    lines.append("## Entity Type")
    lines.append("")
    lines.append(c.entity_type)
    lines.append("")

    lines.append("## VSM System")
    lines.append("")
    lines.append(c.vsm_system)
    lines.append("")

    if c.type_rationale:
        lines.append("## Type Rationale")
        lines.append("")
        lines.append(c.type_rationale)
        lines.append("")

    if c.vsm_rationale:
        lines.append("## VSM Rationale")
        lines.append("")
        lines.append(c.vsm_rationale)
        lines.append("")

    if c.links_mechanism:
        lines.append("## Links")
        lines.append("")
        if c.links_subject:
            lines.append(f"**Subject:** {c.links_subject}")
        if c.links_object:
            lines.append(f"**Object:** {c.links_object}")
        lines.append("")
        lines.append(c.links_mechanism)
        lines.append("")

    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text("\n".join(lines), encoding="utf-8")


def read_entity_classification(path: Path) -> EntityClassification:
    """Read a classification file (YAML frontmatter + markdown body)."""
    text = path.read_text(encoding="utf-8")
    parts = text.split(f"{_FRONTMATTER_SEP}\n", maxsplit=2)
    if len(parts) < 3:
        raise ValueError(f"No YAML frontmatter found in {path}")
    fm = yaml.safe_load(parts[1])
    return EntityClassification.from_dict(fm)


def read_classifications_directory(directory: Path) -> List[EntityClassification]:
    """Read all classification files from a directory."""
    results: List[EntityClassification] = []
    for p in sorted(directory.glob("*.md")):
        try:
            results.append(read_entity_classification(p))
        except Exception:
            pass
    return results