infospace pipeline for wealth of nations example

This commit is contained in:
2026-05-14 18:04:38 +02:00
parent 8804461ca3
commit a729a7643e
26 changed files with 1124 additions and 32 deletions

View File

@@ -9,6 +9,7 @@ from typing import Any, Protocol
import yaml
from .errors import InfospaceError
from .generation import write_entity_bundle_artifacts
from .lifecycle import load_infospace, register_artifact
from .markdown_adapter import render_markdown_template
from .models import KnowledgeArtifact
@@ -61,7 +62,7 @@ class WorkflowStage:
id: str
kind: str
input: str
template: str
template: str = ""
output: WorkflowOutputSpec | None = None
static_macros: dict[str, Any] = field(default_factory=dict)
provider_hint: str | None = None
@@ -74,7 +75,7 @@ class WorkflowStage:
id=str(data["id"]),
kind=str(data.get("kind") or "template"),
input=str(data.get("input") or ""),
template=str(data["template"]),
template=str(data.get("template") or ""),
output=WorkflowOutputSpec.from_dict(output) if isinstance(output, dict) else None,
static_macros=dict(data.get("static_macros") or {}),
provider_hint=(
@@ -210,6 +211,60 @@ class AssistedGenerationAdapter(Protocol):
"""Generate Markdown for an assisted workflow request."""
class FixtureAssistedGenerationAdapter:
def __init__(
self,
responses: dict[tuple[str, str], AssistedGenerationResult],
) -> None:
self.responses = responses
@classmethod
def from_file(cls, path: str | Path) -> "FixtureAssistedGenerationAdapter":
source = Path(path)
data = yaml.safe_load(source.read_text(encoding="utf-8")) or {}
if not isinstance(data, dict):
raise InfospaceError(
"invalid_assisted_fixture",
f"Expected mapping in assisted fixture file: {source}",
{"path": str(source)},
)
responses: dict[tuple[str, str], AssistedGenerationResult] = {}
for item in data.get("responses", []):
if not isinstance(item, dict):
continue
stage_id = str(item["stage_id"])
input_artifact_id = str(item.get("input_artifact_id") or "*")
markdown = str(item.get("markdown") or "")
markdown_path = item.get("markdown_path")
if markdown_path:
markdown = (source.parent / str(markdown_path)).read_text(
encoding="utf-8"
)
responses[(stage_id, input_artifact_id)] = AssistedGenerationResult(
markdown=markdown,
provider=str(item.get("provider") or "fixture"),
metadata=dict(item.get("metadata") or {}),
)
return cls(responses)
def generate(
self,
request: AssistedGenerationRequest,
) -> AssistedGenerationResult:
key = (request.stage_id, request.input_artifact_id)
result = self.responses.get(key) or self.responses.get((request.stage_id, "*"))
if result is None:
raise InfospaceError(
"missing_assisted_fixture_response",
"No fixture response for assisted workflow request",
{
"stage_id": request.stage_id,
"input_artifact_id": request.input_artifact_id,
},
)
return result
@dataclass(frozen=True)
class WorkflowStageRecord:
stage_id: str
@@ -317,9 +372,9 @@ def _execute_workflow(
)
for input_record in selected_inputs:
data = _template_data(workflow, stage, input_record, stage_outputs)
template_text = _read_template(infospace.root, stage.template)
rendered = render_markdown_template(template_text, data)
if stage.kind == "template":
template_text = _read_template(infospace.root, stage.template)
rendered = render_markdown_template(template_text, data)
output = _resolve_output(
workflow,
stage,
@@ -334,6 +389,7 @@ def _execute_workflow(
"content": rendered.markdown,
"artifact_id": output.artifact_id,
"path": output.path,
"provider": "",
}
stages.append(
WorkflowStageRecord(
@@ -345,6 +401,8 @@ def _execute_workflow(
)
)
elif stage.kind == "assisted":
template_text = _read_template(infospace.root, stage.template)
rendered = render_markdown_template(template_text, data)
request = AssistedGenerationRequest(
stage_id=stage.id,
workflow_id=workflow.id,
@@ -386,6 +444,13 @@ def _execute_workflow(
provider=result.provider,
)
outputs.append(output)
stage_outputs[stage.id] = {
"content": result.markdown,
"artifact_id": output.artifact_id,
"path": output.path,
"provider": result.provider,
"metadata": result.metadata,
}
stages.append(
WorkflowStageRecord(
stage_id=stage.id,
@@ -395,6 +460,77 @@ def _execute_workflow(
output_artifact_id=output.artifact_id,
)
)
elif stage.kind == "split_entities":
bundle_stage = str(stage.static_macros.get("bundle_stage") or "")
if not bundle_stage:
raise InfospaceError(
"missing_split_bundle_stage",
"split_entities stage requires static_macros.bundle_stage",
{"workflow_id": workflow.id, "stage_id": stage.id},
)
bundle_output = stage_outputs.get(bundle_stage)
if bundle_output is None:
if dry_run:
stages.append(
WorkflowStageRecord(
stage_id=stage.id,
kind=stage.kind,
status="waiting_for_assisted_output",
input_artifact_id=input_record.artifact_id,
)
)
continue
raise InfospaceError(
"missing_split_bundle_output",
"split_entities stage could not find the source bundle output",
{
"workflow_id": workflow.id,
"stage_id": stage.id,
"bundle_stage": bundle_stage,
},
)
items = write_entity_bundle_artifacts(
infospace.root,
str(bundle_output.get("content") or ""),
workflow_id=workflow.id,
stage_id=stage.id,
input_artifact_id=input_record.artifact_id,
source_bundle_artifact_id=str(
bundle_output.get("artifact_id") or ""
),
provider=str(bundle_output.get("provider") or ""),
dry_run=dry_run,
)
for item in items:
outputs.append(
WorkflowOutputRecord(
stage_id=stage.id,
artifact_id=item.artifact_id,
path=item.path,
kind="entity",
title=item.title,
input_artifact_id=input_record.artifact_id,
written=not dry_run,
)
)
stage_outputs[stage.id] = {
"content": "\n".join(item.markdown for item in items),
"artifact_id": ",".join(item.artifact_id for item in items),
"path": ",".join(item.path for item in items),
"provider": str(bundle_output.get("provider") or ""),
}
stages.append(
WorkflowStageRecord(
stage_id=stage.id,
kind=stage.kind,
status="planned" if dry_run else "completed",
input_artifact_id=input_record.artifact_id,
output_artifact_id=",".join(
item.artifact_id for item in items
),
message=f"split {len(items)} entities",
)
)
else:
raise InfospaceError(
"unsupported_workflow_stage",
@@ -525,25 +661,26 @@ def _resolve_output(
if not dry_run:
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(markdown, encoding="utf-8")
register_artifact(
root,
artifact_id=artifact_id,
path=output_path,
kind=stage.output.kind,
title=title,
provenance={
"workflow_id": workflow.id,
"stage_id": stage.id,
"input_artifact_id": input_record.artifact_id,
**({"provider": provider} if provider else {}),
},
relationships=[
{
"type": "generated_from",
"target": input_record.artifact_id,
}
],
)
if stage.output.kind != "evaluation":
register_artifact(
root,
artifact_id=artifact_id,
path=output_path,
kind=stage.output.kind,
title=title,
provenance={
"workflow_id": workflow.id,
"stage_id": stage.id,
"input_artifact_id": input_record.artifact_id,
**({"provider": provider} if provider else {}),
},
relationships=[
{
"type": "generated_from",
"target": input_record.artifact_id,
}
],
)
return WorkflowOutputRecord(
stage_id=stage.id,
artifact_id=artifact_id,