generated from coulomb/repo-seed
128 lines
3.8 KiB
Python
128 lines
3.8 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from dataclasses import asdict, dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from .errors import InfospaceError
|
|
from .lifecycle import register_artifact
|
|
from .semantics import slugify
|
|
|
|
|
|
ENTITY_HEADING_RE = re.compile(r"(?m)^# (?P<title>.+?)\s*$")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class EntityBundleItem:
|
|
title: str
|
|
slug: str
|
|
markdown: str
|
|
|
|
@property
|
|
def artifact_id(self) -> str:
|
|
return f"entity/{self.slug}.md"
|
|
|
|
@property
|
|
def path(self) -> str:
|
|
return f"artifacts/entities/{self.slug}.md"
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return asdict(self) | {
|
|
"artifact_id": self.artifact_id,
|
|
"path": self.path,
|
|
}
|
|
|
|
|
|
def parse_entity_bundle(markdown: str) -> list[EntityBundleItem]:
|
|
matches = list(ENTITY_HEADING_RE.finditer(markdown))
|
|
if not matches:
|
|
raise InfospaceError(
|
|
"invalid_entity_bundle",
|
|
"Entity bundle does not contain any top-level entity headings",
|
|
{"required_heading": "# <Entity Title>"},
|
|
)
|
|
|
|
items: list[EntityBundleItem] = []
|
|
seen_slugs: set[str] = set()
|
|
for index, match in enumerate(matches):
|
|
end = matches[index + 1].start() if index + 1 < len(matches) else len(markdown)
|
|
section = markdown[match.start() : end].strip() + "\n"
|
|
title = match.group("title").strip()
|
|
slug = slugify(title)
|
|
if not slug:
|
|
raise InfospaceError(
|
|
"invalid_entity_bundle",
|
|
"Entity bundle contains an empty entity heading",
|
|
{"title": title},
|
|
)
|
|
if slug in seen_slugs:
|
|
raise InfospaceError(
|
|
"duplicate_entity_bundle_item",
|
|
f"Entity bundle contains duplicate entity: {title}",
|
|
{"slug": slug, "title": title},
|
|
)
|
|
if not re.search(r"(?m)^## Definition\s*$", section):
|
|
raise InfospaceError(
|
|
"invalid_entity_bundle",
|
|
f"Entity bundle item is missing a Definition section: {title}",
|
|
{"slug": slug, "missing_sections": ["definition"]},
|
|
)
|
|
seen_slugs.add(slug)
|
|
items.append(EntityBundleItem(title=title, slug=slug, markdown=section))
|
|
return items
|
|
|
|
|
|
def write_entity_bundle_artifacts(
|
|
root: str | Path,
|
|
markdown: str,
|
|
*,
|
|
workflow_id: str,
|
|
stage_id: str,
|
|
input_artifact_id: str,
|
|
source_bundle_artifact_id: str = "",
|
|
provider: str = "",
|
|
dry_run: bool = False,
|
|
) -> list[EntityBundleItem]:
|
|
items = parse_entity_bundle(markdown)
|
|
root_path = Path(root)
|
|
for item in items:
|
|
if dry_run:
|
|
continue
|
|
target = root_path / item.path
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
target.write_text(item.markdown, encoding="utf-8")
|
|
relationships = [
|
|
{
|
|
"type": "generated_from",
|
|
"target": input_artifact_id,
|
|
}
|
|
]
|
|
if source_bundle_artifact_id:
|
|
relationships.append(
|
|
{
|
|
"type": "split_from",
|
|
"target": source_bundle_artifact_id,
|
|
}
|
|
)
|
|
register_artifact(
|
|
root_path,
|
|
artifact_id=item.artifact_id,
|
|
path=item.path,
|
|
kind="entity",
|
|
title=item.title,
|
|
provenance={
|
|
"workflow_id": workflow_id,
|
|
"stage_id": stage_id,
|
|
"input_artifact_id": input_artifact_id,
|
|
**(
|
|
{"source_bundle_artifact_id": source_bundle_artifact_id}
|
|
if source_bundle_artifact_id
|
|
else {}
|
|
),
|
|
**({"provider": provider} if provider else {}),
|
|
},
|
|
relationships=relationships,
|
|
)
|
|
return items
|