generated from coulomb/repo-seed
infospace pipeline for wealth of nations example
This commit is contained in:
@@ -9,6 +9,7 @@ from typing import Any, Protocol
|
||||
import yaml
|
||||
|
||||
from .errors import InfospaceError
|
||||
from .generation import write_entity_bundle_artifacts
|
||||
from .lifecycle import load_infospace, register_artifact
|
||||
from .markdown_adapter import render_markdown_template
|
||||
from .models import KnowledgeArtifact
|
||||
@@ -61,7 +62,7 @@ class WorkflowStage:
|
||||
id: str
|
||||
kind: str
|
||||
input: str
|
||||
template: str
|
||||
template: str = ""
|
||||
output: WorkflowOutputSpec | None = None
|
||||
static_macros: dict[str, Any] = field(default_factory=dict)
|
||||
provider_hint: str | None = None
|
||||
@@ -74,7 +75,7 @@ class WorkflowStage:
|
||||
id=str(data["id"]),
|
||||
kind=str(data.get("kind") or "template"),
|
||||
input=str(data.get("input") or ""),
|
||||
template=str(data["template"]),
|
||||
template=str(data.get("template") or ""),
|
||||
output=WorkflowOutputSpec.from_dict(output) if isinstance(output, dict) else None,
|
||||
static_macros=dict(data.get("static_macros") or {}),
|
||||
provider_hint=(
|
||||
@@ -210,6 +211,60 @@ class AssistedGenerationAdapter(Protocol):
|
||||
"""Generate Markdown for an assisted workflow request."""
|
||||
|
||||
|
||||
class FixtureAssistedGenerationAdapter:
|
||||
def __init__(
|
||||
self,
|
||||
responses: dict[tuple[str, str], AssistedGenerationResult],
|
||||
) -> None:
|
||||
self.responses = responses
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, path: str | Path) -> "FixtureAssistedGenerationAdapter":
|
||||
source = Path(path)
|
||||
data = yaml.safe_load(source.read_text(encoding="utf-8")) or {}
|
||||
if not isinstance(data, dict):
|
||||
raise InfospaceError(
|
||||
"invalid_assisted_fixture",
|
||||
f"Expected mapping in assisted fixture file: {source}",
|
||||
{"path": str(source)},
|
||||
)
|
||||
responses: dict[tuple[str, str], AssistedGenerationResult] = {}
|
||||
for item in data.get("responses", []):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
stage_id = str(item["stage_id"])
|
||||
input_artifact_id = str(item.get("input_artifact_id") or "*")
|
||||
markdown = str(item.get("markdown") or "")
|
||||
markdown_path = item.get("markdown_path")
|
||||
if markdown_path:
|
||||
markdown = (source.parent / str(markdown_path)).read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
responses[(stage_id, input_artifact_id)] = AssistedGenerationResult(
|
||||
markdown=markdown,
|
||||
provider=str(item.get("provider") or "fixture"),
|
||||
metadata=dict(item.get("metadata") or {}),
|
||||
)
|
||||
return cls(responses)
|
||||
|
||||
def generate(
|
||||
self,
|
||||
request: AssistedGenerationRequest,
|
||||
) -> AssistedGenerationResult:
|
||||
key = (request.stage_id, request.input_artifact_id)
|
||||
result = self.responses.get(key) or self.responses.get((request.stage_id, "*"))
|
||||
if result is None:
|
||||
raise InfospaceError(
|
||||
"missing_assisted_fixture_response",
|
||||
"No fixture response for assisted workflow request",
|
||||
{
|
||||
"stage_id": request.stage_id,
|
||||
"input_artifact_id": request.input_artifact_id,
|
||||
},
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WorkflowStageRecord:
|
||||
stage_id: str
|
||||
@@ -317,9 +372,9 @@ def _execute_workflow(
|
||||
)
|
||||
for input_record in selected_inputs:
|
||||
data = _template_data(workflow, stage, input_record, stage_outputs)
|
||||
template_text = _read_template(infospace.root, stage.template)
|
||||
rendered = render_markdown_template(template_text, data)
|
||||
if stage.kind == "template":
|
||||
template_text = _read_template(infospace.root, stage.template)
|
||||
rendered = render_markdown_template(template_text, data)
|
||||
output = _resolve_output(
|
||||
workflow,
|
||||
stage,
|
||||
@@ -334,6 +389,7 @@ def _execute_workflow(
|
||||
"content": rendered.markdown,
|
||||
"artifact_id": output.artifact_id,
|
||||
"path": output.path,
|
||||
"provider": "",
|
||||
}
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
@@ -345,6 +401,8 @@ def _execute_workflow(
|
||||
)
|
||||
)
|
||||
elif stage.kind == "assisted":
|
||||
template_text = _read_template(infospace.root, stage.template)
|
||||
rendered = render_markdown_template(template_text, data)
|
||||
request = AssistedGenerationRequest(
|
||||
stage_id=stage.id,
|
||||
workflow_id=workflow.id,
|
||||
@@ -386,6 +444,13 @@ def _execute_workflow(
|
||||
provider=result.provider,
|
||||
)
|
||||
outputs.append(output)
|
||||
stage_outputs[stage.id] = {
|
||||
"content": result.markdown,
|
||||
"artifact_id": output.artifact_id,
|
||||
"path": output.path,
|
||||
"provider": result.provider,
|
||||
"metadata": result.metadata,
|
||||
}
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
stage_id=stage.id,
|
||||
@@ -395,6 +460,77 @@ def _execute_workflow(
|
||||
output_artifact_id=output.artifact_id,
|
||||
)
|
||||
)
|
||||
elif stage.kind == "split_entities":
|
||||
bundle_stage = str(stage.static_macros.get("bundle_stage") or "")
|
||||
if not bundle_stage:
|
||||
raise InfospaceError(
|
||||
"missing_split_bundle_stage",
|
||||
"split_entities stage requires static_macros.bundle_stage",
|
||||
{"workflow_id": workflow.id, "stage_id": stage.id},
|
||||
)
|
||||
bundle_output = stage_outputs.get(bundle_stage)
|
||||
if bundle_output is None:
|
||||
if dry_run:
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
stage_id=stage.id,
|
||||
kind=stage.kind,
|
||||
status="waiting_for_assisted_output",
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
)
|
||||
)
|
||||
continue
|
||||
raise InfospaceError(
|
||||
"missing_split_bundle_output",
|
||||
"split_entities stage could not find the source bundle output",
|
||||
{
|
||||
"workflow_id": workflow.id,
|
||||
"stage_id": stage.id,
|
||||
"bundle_stage": bundle_stage,
|
||||
},
|
||||
)
|
||||
items = write_entity_bundle_artifacts(
|
||||
infospace.root,
|
||||
str(bundle_output.get("content") or ""),
|
||||
workflow_id=workflow.id,
|
||||
stage_id=stage.id,
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
source_bundle_artifact_id=str(
|
||||
bundle_output.get("artifact_id") or ""
|
||||
),
|
||||
provider=str(bundle_output.get("provider") or ""),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
for item in items:
|
||||
outputs.append(
|
||||
WorkflowOutputRecord(
|
||||
stage_id=stage.id,
|
||||
artifact_id=item.artifact_id,
|
||||
path=item.path,
|
||||
kind="entity",
|
||||
title=item.title,
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
written=not dry_run,
|
||||
)
|
||||
)
|
||||
stage_outputs[stage.id] = {
|
||||
"content": "\n".join(item.markdown for item in items),
|
||||
"artifact_id": ",".join(item.artifact_id for item in items),
|
||||
"path": ",".join(item.path for item in items),
|
||||
"provider": str(bundle_output.get("provider") or ""),
|
||||
}
|
||||
stages.append(
|
||||
WorkflowStageRecord(
|
||||
stage_id=stage.id,
|
||||
kind=stage.kind,
|
||||
status="planned" if dry_run else "completed",
|
||||
input_artifact_id=input_record.artifact_id,
|
||||
output_artifact_id=",".join(
|
||||
item.artifact_id for item in items
|
||||
),
|
||||
message=f"split {len(items)} entities",
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise InfospaceError(
|
||||
"unsupported_workflow_stage",
|
||||
@@ -525,25 +661,26 @@ def _resolve_output(
|
||||
if not dry_run:
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(markdown, encoding="utf-8")
|
||||
register_artifact(
|
||||
root,
|
||||
artifact_id=artifact_id,
|
||||
path=output_path,
|
||||
kind=stage.output.kind,
|
||||
title=title,
|
||||
provenance={
|
||||
"workflow_id": workflow.id,
|
||||
"stage_id": stage.id,
|
||||
"input_artifact_id": input_record.artifact_id,
|
||||
**({"provider": provider} if provider else {}),
|
||||
},
|
||||
relationships=[
|
||||
{
|
||||
"type": "generated_from",
|
||||
"target": input_record.artifact_id,
|
||||
}
|
||||
],
|
||||
)
|
||||
if stage.output.kind != "evaluation":
|
||||
register_artifact(
|
||||
root,
|
||||
artifact_id=artifact_id,
|
||||
path=output_path,
|
||||
kind=stage.output.kind,
|
||||
title=title,
|
||||
provenance={
|
||||
"workflow_id": workflow.id,
|
||||
"stage_id": stage.id,
|
||||
"input_artifact_id": input_record.artifact_id,
|
||||
**({"provider": provider} if provider else {}),
|
||||
},
|
||||
relationships=[
|
||||
{
|
||||
"type": "generated_from",
|
||||
"target": input_record.artifact_id,
|
||||
}
|
||||
],
|
||||
)
|
||||
return WorkflowOutputRecord(
|
||||
stage_id=stage.id,
|
||||
artifact_id=artifact_id,
|
||||
|
||||
Reference in New Issue
Block a user