from __future__ import annotations import re from dataclasses import asdict, dataclass from pathlib import Path from typing import Any from .errors import InfospaceError from .lifecycle import register_artifact from .semantics import slugify ENTITY_HEADING_RE = re.compile(r"(?m)^# (?P.+?)\s*$") @dataclass(frozen=True) class EntityBundleItem: title: str slug: str markdown: str @property def artifact_id(self) -> str: return f"entity/{self.slug}.md" @property def path(self) -> str: return f"artifacts/entities/{self.slug}.md" def to_dict(self) -> dict[str, Any]: return asdict(self) | { "artifact_id": self.artifact_id, "path": self.path, } def parse_entity_bundle(markdown: str) -> list[EntityBundleItem]: matches = list(ENTITY_HEADING_RE.finditer(markdown)) if not matches: raise InfospaceError( "invalid_entity_bundle", "Entity bundle does not contain any top-level entity headings", {"required_heading": "# <Entity Title>"}, ) items: list[EntityBundleItem] = [] seen_slugs: set[str] = set() for index, match in enumerate(matches): end = matches[index + 1].start() if index + 1 < len(matches) else len(markdown) section = markdown[match.start() : end].strip() + "\n" title = match.group("title").strip() slug = slugify(title) if not slug: raise InfospaceError( "invalid_entity_bundle", "Entity bundle contains an empty entity heading", {"title": title}, ) if slug in seen_slugs: raise InfospaceError( "duplicate_entity_bundle_item", f"Entity bundle contains duplicate entity: {title}", {"slug": slug, "title": title}, ) if not re.search(r"(?m)^## Definition\s*$", section): raise InfospaceError( "invalid_entity_bundle", f"Entity bundle item is missing a Definition section: {title}", {"slug": slug, "missing_sections": ["definition"]}, ) seen_slugs.add(slug) items.append(EntityBundleItem(title=title, slug=slug, markdown=section)) return items def write_entity_bundle_artifacts( root: str | Path, markdown: str, *, workflow_id: str, stage_id: str, input_artifact_id: str, source_bundle_artifact_id: str = "", provider: str = "", dry_run: bool = False, ) -> list[EntityBundleItem]: items = parse_entity_bundle(markdown) root_path = Path(root) for item in items: if dry_run: continue target = root_path / item.path target.parent.mkdir(parents=True, exist_ok=True) target.write_text(item.markdown, encoding="utf-8") relationships = [ { "type": "generated_from", "target": input_artifact_id, } ] if source_bundle_artifact_id: relationships.append( { "type": "split_from", "target": source_bundle_artifact_id, } ) register_artifact( root_path, artifact_id=item.artifact_id, path=item.path, kind="entity", title=item.title, provenance={ "workflow_id": workflow_id, "stage_id": stage_id, "input_artifact_id": input_artifact_id, **( {"source_bundle_artifact_id": source_bundle_artifact_id} if source_bundle_artifact_id else {} ), **({"provider": provider} if provider else {}), }, relationships=relationships, ) return items