generated from coulomb/repo-seed
infospace pipeline for wealth of nations example
This commit is contained in:
@@ -9,6 +9,7 @@ from pathlib import Path
|
||||
from .checks import run_collection_checks
|
||||
from .engine import engine_capability_contract, plan_asset_sync, sync_assets
|
||||
from .errors import InfospaceError
|
||||
from .evaluation_io import read_entity_evaluations
|
||||
from .history import (
|
||||
build_viability_report,
|
||||
find_snapshot,
|
||||
@@ -21,7 +22,12 @@ from .inspection import export_mermaid, relationship_summary
|
||||
from .lifecycle import add_artifact, create_infospace, load_infospace
|
||||
from .markdown_adapter import validate_infospace_artifacts
|
||||
from .semantics import list_entities, list_relations
|
||||
from .workflow import load_workflows, plan_workflow, run_workflow
|
||||
from .workflow import (
|
||||
FixtureAssistedGenerationAdapter,
|
||||
load_workflows,
|
||||
plan_workflow,
|
||||
run_workflow,
|
||||
)
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
@@ -111,6 +117,11 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
)
|
||||
workflow_run.add_argument("root")
|
||||
workflow_run.add_argument("workflow_id")
|
||||
workflow_run.add_argument(
|
||||
"--fixture-responses",
|
||||
default="",
|
||||
help="Run assisted stages with deterministic fixture responses",
|
||||
)
|
||||
|
||||
engine = sub.add_parser("engine", help="Inspect and sync engine boundary state")
|
||||
engine_sub = engine.add_subparsers(dest="engine_command", required=True)
|
||||
@@ -222,7 +233,11 @@ def main(argv: list[str] | None = None) -> int:
|
||||
elif args.command == "check":
|
||||
infospace = load_infospace(Path(args.root))
|
||||
report = run_collection_checks(infospace.artifacts)
|
||||
result = record_check_results(infospace.root, report)
|
||||
result = record_check_results(
|
||||
infospace.root,
|
||||
report,
|
||||
artifact_evaluations=_read_output_evaluations(infospace.root),
|
||||
)
|
||||
_write_json(
|
||||
{
|
||||
**result.to_dict(),
|
||||
@@ -253,8 +268,19 @@ def main(argv: list[str] | None = None) -> int:
|
||||
plan_workflow(Path(args.root), args.workflow_id).to_dict()
|
||||
)
|
||||
elif args.workflow_command == "run":
|
||||
adapter = (
|
||||
FixtureAssistedGenerationAdapter.from_file(
|
||||
Path(args.fixture_responses)
|
||||
)
|
||||
if args.fixture_responses
|
||||
else None
|
||||
)
|
||||
_write_json(
|
||||
run_workflow(Path(args.root), args.workflow_id).to_dict()
|
||||
run_workflow(
|
||||
Path(args.root),
|
||||
args.workflow_id,
|
||||
assisted_adapter=adapter,
|
||||
).to_dict()
|
||||
)
|
||||
else:
|
||||
parser.error(f"Unhandled workflow command: {args.workflow_command}")
|
||||
@@ -328,9 +354,14 @@ def _record_checks(root: Path):
|
||||
return record_check_results(
|
||||
infospace.root,
|
||||
run_collection_checks(infospace.artifacts),
|
||||
artifact_evaluations=_read_output_evaluations(infospace.root),
|
||||
)
|
||||
|
||||
|
||||
def _read_output_evaluations(root: Path):
|
||||
return read_entity_evaluations(root / "output" / "evaluations")
|
||||
|
||||
|
||||
def _relationship_summary_payload(summary) -> dict:
|
||||
return {
|
||||
"node_count": summary.node_count,
|
||||
|
||||
@@ -75,6 +75,17 @@ def read_entity_evaluation(path: str | Path) -> EntityEvaluation:
|
||||
)
|
||||
|
||||
|
||||
def read_entity_evaluations(directory: str | Path) -> list[EntityEvaluation]:
|
||||
source = Path(directory)
|
||||
if not source.is_dir():
|
||||
return []
|
||||
return [
|
||||
read_entity_evaluation(path)
|
||||
for path in sorted(source.glob("*.md"))
|
||||
if path.is_file()
|
||||
]
|
||||
|
||||
|
||||
def write_snapshot(snapshot: EvaluationSnapshot, path: str | Path) -> None:
|
||||
target = Path(path)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
127
src/infospace_bench/generation.py
Normal file
127
src/infospace_bench/generation.py
Normal file
@@ -0,0 +1,127 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .errors import InfospaceError
|
||||
from .lifecycle import register_artifact
|
||||
from .semantics import slugify
|
||||
|
||||
|
||||
ENTITY_HEADING_RE = re.compile(r"(?m)^# (?P<title>.+?)\s*$")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EntityBundleItem:
|
||||
title: str
|
||||
slug: str
|
||||
markdown: str
|
||||
|
||||
@property
|
||||
def artifact_id(self) -> str:
|
||||
return f"entity/{self.slug}.md"
|
||||
|
||||
@property
|
||||
def path(self) -> str:
|
||||
return f"artifacts/entities/{self.slug}.md"
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self) | {
|
||||
"artifact_id": self.artifact_id,
|
||||
"path": self.path,
|
||||
}
|
||||
|
||||
|
||||
def parse_entity_bundle(markdown: str) -> list[EntityBundleItem]:
|
||||
matches = list(ENTITY_HEADING_RE.finditer(markdown))
|
||||
if not matches:
|
||||
raise InfospaceError(
|
||||
"invalid_entity_bundle",
|
||||
"Entity bundle does not contain any top-level entity headings",
|
||||
{"required_heading": "# <Entity Title>"},
|
||||
)
|
||||
|
||||
items: list[EntityBundleItem] = []
|
||||
seen_slugs: set[str] = set()
|
||||
for index, match in enumerate(matches):
|
||||
end = matches[index + 1].start() if index + 1 < len(matches) else len(markdown)
|
||||
section = markdown[match.start() : end].strip() + "\n"
|
||||
title = match.group("title").strip()
|
||||
slug = slugify(title)
|
||||
if not slug:
|
||||
raise InfospaceError(
|
||||
"invalid_entity_bundle",
|
||||
"Entity bundle contains an empty entity heading",
|
||||
{"title": title},
|
||||
)
|
||||
if slug in seen_slugs:
|
||||
raise InfospaceError(
|
||||
"duplicate_entity_bundle_item",
|
||||
f"Entity bundle contains duplicate entity: {title}",
|
||||
{"slug": slug, "title": title},
|
||||
)
|
||||
if not re.search(r"(?m)^## Definition\s*$", section):
|
||||
raise InfospaceError(
|
||||
"invalid_entity_bundle",
|
||||
f"Entity bundle item is missing a Definition section: {title}",
|
||||
{"slug": slug, "missing_sections": ["definition"]},
|
||||
)
|
||||
seen_slugs.add(slug)
|
||||
items.append(EntityBundleItem(title=title, slug=slug, markdown=section))
|
||||
return items
|
||||
|
||||
|
||||
def write_entity_bundle_artifacts(
|
||||
root: str | Path,
|
||||
markdown: str,
|
||||
*,
|
||||
workflow_id: str,
|
||||
stage_id: str,
|
||||
input_artifact_id: str,
|
||||
source_bundle_artifact_id: str = "",
|
||||
provider: str = "",
|
||||
dry_run: bool = False,
|
||||
) -> list[EntityBundleItem]:
|
||||
items = parse_entity_bundle(markdown)
|
||||
root_path = Path(root)
|
||||
for item in items:
|
||||
if dry_run:
|
||||
continue
|
||||
target = root_path / item.path
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(item.markdown, encoding="utf-8")
|
||||
relationships = [
|
||||
{
|
||||
"type": "generated_from",
|
||||
"target": input_artifact_id,
|
||||
}
|
||||
]
|
||||
if source_bundle_artifact_id:
|
||||
relationships.append(
|
||||
{
|
||||
"type": "split_from",
|
||||
"target": source_bundle_artifact_id,
|
||||
}
|
||||
)
|
||||
register_artifact(
|
||||
root_path,
|
||||
artifact_id=item.artifact_id,
|
||||
path=item.path,
|
||||
kind="entity",
|
||||
title=item.title,
|
||||
provenance={
|
||||
"workflow_id": workflow_id,
|
||||
"stage_id": stage_id,
|
||||
"input_artifact_id": input_artifact_id,
|
||||
**(
|
||||
{"source_bundle_artifact_id": source_bundle_artifact_id}
|
||||
if source_bundle_artifact_id
|
||||
else {}
|
||||
),
|
||||
**({"provider": provider} if provider else {}),
|
||||
},
|
||||
relationships=relationships,
|
||||
)
|
||||
return items
|
||||
@@ -9,6 +9,7 @@ from typing import Any, Protocol
|
||||
import yaml
|
||||
|
||||
from .errors import InfospaceError
|
||||
from .generation import write_entity_bundle_artifacts
|
||||
from .lifecycle import load_infospace, register_artifact
|
||||
from .markdown_adapter import render_markdown_template
|
||||
from .models import KnowledgeArtifact
|
||||
@@ -61,7 +62,7 @@ class WorkflowStage:
|
||||
id: str
|
||||
kind: str
|
||||
input: str
|
||||
template: str
|
||||
template: str = ""
|
||||
output: WorkflowOutputSpec | None = None
|
||||
static_macros: dict[str, Any] = field(default_factory=dict)
|
||||
provider_hint: str | None = None
|
||||
@@ -74,7 +75,7 @@ class WorkflowStage:
|
||||
id=str(data["id"]),
|
||||
kind=str(data.get("kind") or "template"),
|
||||
input=str(data.get("input") or ""),
|
||||
template=str(data["template"]),
|
||||
template=str(data.get("template") or ""),
|
||||
output=WorkflowOutputSpec.from_dict(output) if isinstance(output, dict) else None,
|
||||
static_macros=dict(data.get("static_macros") or {}),
|
||||
provider_hint=(
|
||||
@@ -210,6 +211,60 @@ class AssistedGenerationAdapter(Protocol):
|
||||
"""Generate Markdown for an assisted workflow request."""
|
||||
|
||||
|
||||
class FixtureAssistedGenerationAdapter:
|
||||
def __init__(
|
||||
self,
|
||||
responses: dict[tuple[str, str], AssistedGenerationResult],
|
||||
) -> None:
|
||||
self.responses = responses
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, path: str | Path) -> "FixtureAssistedGenerationAdapter":
|
||||
source = Path(path)
|
||||
data = yaml.safe_load(source.read_text(encoding="utf-8")) or {}
|
||||
if not isinstance(data, dict):
|
||||
raise InfospaceError(
|
||||
"invalid_assisted_fixture",
|
||||
f"Expected mapping in assisted fixture file: {source}",
|
||||
{"path": str(source)},
|
||||
)
|
||||
responses: dict[tuple[str, str], AssistedGenerationResult] = {}
|
||||
for item in data.get("responses", []):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
stage_id = str(item["stage_id"])
|
||||
input_artifact_id = str(item.get("input_artifact_id") or "*")
|
||||
markdown = str(item.get("markdown") or "")
|
||||
markdown_path = item.get("markdown_path")
|
||||
if markdown_path:
|
||||
markdown = (source.parent / str(markdown_path)).read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
responses[(stage_id, input_artifact_id)] = AssistedGenerationResult(
|
||||
markdown=markdown,
|
||||
provider=str(item.get("provider") or "fixture"),
|
||||
metadata=dict(item.get("metadata") or {}),
|
||||
)
|
||||
return cls(responses)
|
||||
|
||||
def generate(
|
||||
self,
|
||||
request: AssistedGenerationRequest,
|
||||
) -> AssistedGenerationResult:
|
||||
key = (request.stage_id, request.input_artifact_id)
|
||||
result = self.responses.get(key) or self.responses.get((request.stage_id, "*"))
|
||||
if result is None:
|
||||
raise InfospaceError(
|
||||
"missing_assisted_fixture_response",
|
||||
"No fixture response for assisted workflow request",
|
||||
{
|
||||
"stage_id": request.stage_id,
|
||||
"input_artifact_id": request.input_artifact_id,
|
||||
},
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WorkflowStageRecord:
|
||||
stage_id: str
|
||||
@@ -317,9 +372,9 @@ def _execute_workflow(
|
||||
)
|
||||
for input_record in selected_inputs:
|
||||
data = _template_data(workflow, stage, input_record, stage_outputs)
|
||||
template_text = _read_template(infospace.root, stage.template)
|
||||
rendered = render_markdown_template(template_text, data)
|
||||
if stage.kind == "template":
|
||||
template_text = _read_template(infospace.root, stage.template)
|
||||
rendered = render_markdown_template(template_text, data)
|
||||
output = _resolve_output(
|
||||
workflow,
|
||||
stage,
|
||||
@@ -334,6 +389,7 @@ def _execute_workflow(
|
||||
"content": rendered.markdown,
|
||||
"artifact_id": output.artifact_id,
|
||||
"path": output.path,
|
||||
"provider": "",
|
||||
}
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
@@ -345,6 +401,8 @@ def _execute_workflow(
|
||||
)
|
||||
)
|
||||
elif stage.kind == "assisted":
|
||||
template_text = _read_template(infospace.root, stage.template)
|
||||
rendered = render_markdown_template(template_text, data)
|
||||
request = AssistedGenerationRequest(
|
||||
stage_id=stage.id,
|
||||
workflow_id=workflow.id,
|
||||
@@ -386,6 +444,13 @@ def _execute_workflow(
|
||||
provider=result.provider,
|
||||
)
|
||||
outputs.append(output)
|
||||
stage_outputs[stage.id] = {
|
||||
"content": result.markdown,
|
||||
"artifact_id": output.artifact_id,
|
||||
"path": output.path,
|
||||
"provider": result.provider,
|
||||
"metadata": result.metadata,
|
||||
}
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
stage_id=stage.id,
|
||||
@@ -395,6 +460,77 @@ def _execute_workflow(
|
||||
output_artifact_id=output.artifact_id,
|
||||
)
|
||||
)
|
||||
elif stage.kind == "split_entities":
|
||||
bundle_stage = str(stage.static_macros.get("bundle_stage") or "")
|
||||
if not bundle_stage:
|
||||
raise InfospaceError(
|
||||
"missing_split_bundle_stage",
|
||||
"split_entities stage requires static_macros.bundle_stage",
|
||||
{"workflow_id": workflow.id, "stage_id": stage.id},
|
||||
)
|
||||
bundle_output = stage_outputs.get(bundle_stage)
|
||||
if bundle_output is None:
|
||||
if dry_run:
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
stage_id=stage.id,
|
||||
kind=stage.kind,
|
||||
status="waiting_for_assisted_output",
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
)
|
||||
)
|
||||
continue
|
||||
raise InfospaceError(
|
||||
"missing_split_bundle_output",
|
||||
"split_entities stage could not find the source bundle output",
|
||||
{
|
||||
"workflow_id": workflow.id,
|
||||
"stage_id": stage.id,
|
||||
"bundle_stage": bundle_stage,
|
||||
},
|
||||
)
|
||||
items = write_entity_bundle_artifacts(
|
||||
infospace.root,
|
||||
str(bundle_output.get("content") or ""),
|
||||
workflow_id=workflow.id,
|
||||
stage_id=stage.id,
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
source_bundle_artifact_id=str(
|
||||
bundle_output.get("artifact_id") or ""
|
||||
),
|
||||
provider=str(bundle_output.get("provider") or ""),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
for item in items:
|
||||
outputs.append(
|
||||
WorkflowOutputRecord(
|
||||
stage_id=stage.id,
|
||||
artifact_id=item.artifact_id,
|
||||
path=item.path,
|
||||
kind="entity",
|
||||
title=item.title,
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
written=not dry_run,
|
||||
)
|
||||
)
|
||||
stage_outputs[stage.id] = {
|
||||
"content": "\n".join(item.markdown for item in items),
|
||||
"artifact_id": ",".join(item.artifact_id for item in items),
|
||||
"path": ",".join(item.path for item in items),
|
||||
"provider": str(bundle_output.get("provider") or ""),
|
||||
}
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
stage_id=stage.id,
|
||||
kind=stage.kind,
|
||||
status="planned" if dry_run else "completed",
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
output_artifact_id=",".join(
|
||||
item.artifact_id for item in items
|
||||
),
|
||||
message=f"split {len(items)} entities",
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise InfospaceError(
|
||||
"unsupported_workflow_stage",
|
||||
@@ -525,25 +661,26 @@ def _resolve_output(
|
||||
if not dry_run:
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(markdown, encoding="utf-8")
|
||||
register_artifact(
|
||||
root,
|
||||
artifact_id=artifact_id,
|
||||
path=output_path,
|
||||
kind=stage.output.kind,
|
||||
title=title,
|
||||
provenance={
|
||||
"workflow_id": workflow.id,
|
||||
"stage_id": stage.id,
|
||||
"input_artifact_id": input_record.artifact_id,
|
||||
**({"provider": provider} if provider else {}),
|
||||
},
|
||||
relationships=[
|
||||
{
|
||||
"type": "generated_from",
|
||||
"target": input_record.artifact_id,
|
||||
}
|
||||
],
|
||||
)
|
||||
if stage.output.kind != "evaluation":
|
||||
register_artifact(
|
||||
root,
|
||||
artifact_id=artifact_id,
|
||||
path=output_path,
|
||||
kind=stage.output.kind,
|
||||
title=title,
|
||||
provenance={
|
||||
"workflow_id": workflow.id,
|
||||
"stage_id": stage.id,
|
||||
"input_artifact_id": input_record.artifact_id,
|
||||
**({"provider": provider} if provider else {}),
|
||||
},
|
||||
relationships=[
|
||||
{
|
||||
"type": "generated_from",
|
||||
"target": input_record.artifact_id,
|
||||
}
|
||||
],
|
||||
)
|
||||
return WorkflowOutputRecord(
|
||||
stage_id=stage.id,
|
||||
artifact_id=artifact_id,
|
||||
|
||||
Reference in New Issue
Block a user