infospace pipeline for wealth of nations example

This commit is contained in:
2026-05-14 18:04:38 +02:00
parent 8804461ca3
commit a729a7643e
26 changed files with 1124 additions and 32 deletions

View File

@@ -9,6 +9,7 @@ from pathlib import Path
from .checks import run_collection_checks
from .engine import engine_capability_contract, plan_asset_sync, sync_assets
from .errors import InfospaceError
from .evaluation_io import read_entity_evaluations
from .history import (
build_viability_report,
find_snapshot,
@@ -21,7 +22,12 @@ from .inspection import export_mermaid, relationship_summary
from .lifecycle import add_artifact, create_infospace, load_infospace
from .markdown_adapter import validate_infospace_artifacts
from .semantics import list_entities, list_relations
from .workflow import load_workflows, plan_workflow, run_workflow
from .workflow import (
FixtureAssistedGenerationAdapter,
load_workflows,
plan_workflow,
run_workflow,
)
def build_parser() -> argparse.ArgumentParser:
@@ -111,6 +117,11 @@ def build_parser() -> argparse.ArgumentParser:
)
workflow_run.add_argument("root")
workflow_run.add_argument("workflow_id")
workflow_run.add_argument(
"--fixture-responses",
default="",
help="Run assisted stages with deterministic fixture responses",
)
engine = sub.add_parser("engine", help="Inspect and sync engine boundary state")
engine_sub = engine.add_subparsers(dest="engine_command", required=True)
@@ -222,7 +233,11 @@ def main(argv: list[str] | None = None) -> int:
elif args.command == "check":
infospace = load_infospace(Path(args.root))
report = run_collection_checks(infospace.artifacts)
result = record_check_results(infospace.root, report)
result = record_check_results(
infospace.root,
report,
artifact_evaluations=_read_output_evaluations(infospace.root),
)
_write_json(
{
**result.to_dict(),
@@ -253,8 +268,19 @@ def main(argv: list[str] | None = None) -> int:
plan_workflow(Path(args.root), args.workflow_id).to_dict()
)
elif args.workflow_command == "run":
adapter = (
FixtureAssistedGenerationAdapter.from_file(
Path(args.fixture_responses)
)
if args.fixture_responses
else None
)
_write_json(
run_workflow(Path(args.root), args.workflow_id).to_dict()
run_workflow(
Path(args.root),
args.workflow_id,
assisted_adapter=adapter,
).to_dict()
)
else:
parser.error(f"Unhandled workflow command: {args.workflow_command}")
@@ -328,9 +354,14 @@ def _record_checks(root: Path):
return record_check_results(
infospace.root,
run_collection_checks(infospace.artifacts),
artifact_evaluations=_read_output_evaluations(infospace.root),
)
def _read_output_evaluations(root: Path):
return read_entity_evaluations(root / "output" / "evaluations")
def _relationship_summary_payload(summary) -> dict:
return {
"node_count": summary.node_count,

View File

@@ -75,6 +75,17 @@ def read_entity_evaluation(path: str | Path) -> EntityEvaluation:
)
def read_entity_evaluations(directory: str | Path) -> list[EntityEvaluation]:
source = Path(directory)
if not source.is_dir():
return []
return [
read_entity_evaluation(path)
for path in sorted(source.glob("*.md"))
if path.is_file()
]
def write_snapshot(snapshot: EvaluationSnapshot, path: str | Path) -> None:
target = Path(path)
target.parent.mkdir(parents=True, exist_ok=True)

View File

@@ -0,0 +1,127 @@
from __future__ import annotations
import re
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any
from .errors import InfospaceError
from .lifecycle import register_artifact
from .semantics import slugify
ENTITY_HEADING_RE = re.compile(r"(?m)^# (?P<title>.+?)\s*$")
@dataclass(frozen=True)
class EntityBundleItem:
title: str
slug: str
markdown: str
@property
def artifact_id(self) -> str:
return f"entity/{self.slug}.md"
@property
def path(self) -> str:
return f"artifacts/entities/{self.slug}.md"
def to_dict(self) -> dict[str, Any]:
return asdict(self) | {
"artifact_id": self.artifact_id,
"path": self.path,
}
def parse_entity_bundle(markdown: str) -> list[EntityBundleItem]:
matches = list(ENTITY_HEADING_RE.finditer(markdown))
if not matches:
raise InfospaceError(
"invalid_entity_bundle",
"Entity bundle does not contain any top-level entity headings",
{"required_heading": "# <Entity Title>"},
)
items: list[EntityBundleItem] = []
seen_slugs: set[str] = set()
for index, match in enumerate(matches):
end = matches[index + 1].start() if index + 1 < len(matches) else len(markdown)
section = markdown[match.start() : end].strip() + "\n"
title = match.group("title").strip()
slug = slugify(title)
if not slug:
raise InfospaceError(
"invalid_entity_bundle",
"Entity bundle contains an empty entity heading",
{"title": title},
)
if slug in seen_slugs:
raise InfospaceError(
"duplicate_entity_bundle_item",
f"Entity bundle contains duplicate entity: {title}",
{"slug": slug, "title": title},
)
if not re.search(r"(?m)^## Definition\s*$", section):
raise InfospaceError(
"invalid_entity_bundle",
f"Entity bundle item is missing a Definition section: {title}",
{"slug": slug, "missing_sections": ["definition"]},
)
seen_slugs.add(slug)
items.append(EntityBundleItem(title=title, slug=slug, markdown=section))
return items
def write_entity_bundle_artifacts(
root: str | Path,
markdown: str,
*,
workflow_id: str,
stage_id: str,
input_artifact_id: str,
source_bundle_artifact_id: str = "",
provider: str = "",
dry_run: bool = False,
) -> list[EntityBundleItem]:
items = parse_entity_bundle(markdown)
root_path = Path(root)
for item in items:
if dry_run:
continue
target = root_path / item.path
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(item.markdown, encoding="utf-8")
relationships = [
{
"type": "generated_from",
"target": input_artifact_id,
}
]
if source_bundle_artifact_id:
relationships.append(
{
"type": "split_from",
"target": source_bundle_artifact_id,
}
)
register_artifact(
root_path,
artifact_id=item.artifact_id,
path=item.path,
kind="entity",
title=item.title,
provenance={
"workflow_id": workflow_id,
"stage_id": stage_id,
"input_artifact_id": input_artifact_id,
**(
{"source_bundle_artifact_id": source_bundle_artifact_id}
if source_bundle_artifact_id
else {}
),
**({"provider": provider} if provider else {}),
},
relationships=relationships,
)
return items

View File

@@ -9,6 +9,7 @@ from typing import Any, Protocol
import yaml
from .errors import InfospaceError
from .generation import write_entity_bundle_artifacts
from .lifecycle import load_infospace, register_artifact
from .markdown_adapter import render_markdown_template
from .models import KnowledgeArtifact
@@ -61,7 +62,7 @@ class WorkflowStage:
id: str
kind: str
input: str
template: str
template: str = ""
output: WorkflowOutputSpec | None = None
static_macros: dict[str, Any] = field(default_factory=dict)
provider_hint: str | None = None
@@ -74,7 +75,7 @@ class WorkflowStage:
id=str(data["id"]),
kind=str(data.get("kind") or "template"),
input=str(data.get("input") or ""),
template=str(data["template"]),
template=str(data.get("template") or ""),
output=WorkflowOutputSpec.from_dict(output) if isinstance(output, dict) else None,
static_macros=dict(data.get("static_macros") or {}),
provider_hint=(
@@ -210,6 +211,60 @@ class AssistedGenerationAdapter(Protocol):
"""Generate Markdown for an assisted workflow request."""
class FixtureAssistedGenerationAdapter:
def __init__(
self,
responses: dict[tuple[str, str], AssistedGenerationResult],
) -> None:
self.responses = responses
@classmethod
def from_file(cls, path: str | Path) -> "FixtureAssistedGenerationAdapter":
source = Path(path)
data = yaml.safe_load(source.read_text(encoding="utf-8")) or {}
if not isinstance(data, dict):
raise InfospaceError(
"invalid_assisted_fixture",
f"Expected mapping in assisted fixture file: {source}",
{"path": str(source)},
)
responses: dict[tuple[str, str], AssistedGenerationResult] = {}
for item in data.get("responses", []):
if not isinstance(item, dict):
continue
stage_id = str(item["stage_id"])
input_artifact_id = str(item.get("input_artifact_id") or "*")
markdown = str(item.get("markdown") or "")
markdown_path = item.get("markdown_path")
if markdown_path:
markdown = (source.parent / str(markdown_path)).read_text(
encoding="utf-8"
)
responses[(stage_id, input_artifact_id)] = AssistedGenerationResult(
markdown=markdown,
provider=str(item.get("provider") or "fixture"),
metadata=dict(item.get("metadata") or {}),
)
return cls(responses)
def generate(
self,
request: AssistedGenerationRequest,
) -> AssistedGenerationResult:
key = (request.stage_id, request.input_artifact_id)
result = self.responses.get(key) or self.responses.get((request.stage_id, "*"))
if result is None:
raise InfospaceError(
"missing_assisted_fixture_response",
"No fixture response for assisted workflow request",
{
"stage_id": request.stage_id,
"input_artifact_id": request.input_artifact_id,
},
)
return result
@dataclass(frozen=True)
class WorkflowStageRecord:
stage_id: str
@@ -317,9 +372,9 @@ def _execute_workflow(
)
for input_record in selected_inputs:
data = _template_data(workflow, stage, input_record, stage_outputs)
template_text = _read_template(infospace.root, stage.template)
rendered = render_markdown_template(template_text, data)
if stage.kind == "template":
template_text = _read_template(infospace.root, stage.template)
rendered = render_markdown_template(template_text, data)
output = _resolve_output(
workflow,
stage,
@@ -334,6 +389,7 @@ def _execute_workflow(
"content": rendered.markdown,
"artifact_id": output.artifact_id,
"path": output.path,
"provider": "",
}
stages.append(
WorkflowStageRecord(
@@ -345,6 +401,8 @@ def _execute_workflow(
)
)
elif stage.kind == "assisted":
template_text = _read_template(infospace.root, stage.template)
rendered = render_markdown_template(template_text, data)
request = AssistedGenerationRequest(
stage_id=stage.id,
workflow_id=workflow.id,
@@ -386,6 +444,13 @@ def _execute_workflow(
provider=result.provider,
)
outputs.append(output)
stage_outputs[stage.id] = {
"content": result.markdown,
"artifact_id": output.artifact_id,
"path": output.path,
"provider": result.provider,
"metadata": result.metadata,
}
stages.append(
WorkflowStageRecord(
stage_id=stage.id,
@@ -395,6 +460,77 @@ def _execute_workflow(
output_artifact_id=output.artifact_id,
)
)
elif stage.kind == "split_entities":
bundle_stage = str(stage.static_macros.get("bundle_stage") or "")
if not bundle_stage:
raise InfospaceError(
"missing_split_bundle_stage",
"split_entities stage requires static_macros.bundle_stage",
{"workflow_id": workflow.id, "stage_id": stage.id},
)
bundle_output = stage_outputs.get(bundle_stage)
if bundle_output is None:
if dry_run:
stages.append(
WorkflowStageRecord(
stage_id=stage.id,
kind=stage.kind,
status="waiting_for_assisted_output",
input_artifact_id=input_record.artifact_id,
)
)
continue
raise InfospaceError(
"missing_split_bundle_output",
"split_entities stage could not find the source bundle output",
{
"workflow_id": workflow.id,
"stage_id": stage.id,
"bundle_stage": bundle_stage,
},
)
items = write_entity_bundle_artifacts(
infospace.root,
str(bundle_output.get("content") or ""),
workflow_id=workflow.id,
stage_id=stage.id,
input_artifact_id=input_record.artifact_id,
source_bundle_artifact_id=str(
bundle_output.get("artifact_id") or ""
),
provider=str(bundle_output.get("provider") or ""),
dry_run=dry_run,
)
for item in items:
outputs.append(
WorkflowOutputRecord(
stage_id=stage.id,
artifact_id=item.artifact_id,
path=item.path,
kind="entity",
title=item.title,
input_artifact_id=input_record.artifact_id,
written=not dry_run,
)
)
stage_outputs[stage.id] = {
"content": "\n".join(item.markdown for item in items),
"artifact_id": ",".join(item.artifact_id for item in items),
"path": ",".join(item.path for item in items),
"provider": str(bundle_output.get("provider") or ""),
}
stages.append(
WorkflowStageRecord(
stage_id=stage.id,
kind=stage.kind,
status="planned" if dry_run else "completed",
input_artifact_id=input_record.artifact_id,
output_artifact_id=",".join(
item.artifact_id for item in items
),
message=f"split {len(items)} entities",
)
)
else:
raise InfospaceError(
"unsupported_workflow_stage",
@@ -525,25 +661,26 @@ def _resolve_output(
if not dry_run:
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(markdown, encoding="utf-8")
register_artifact(
root,
artifact_id=artifact_id,
path=output_path,
kind=stage.output.kind,
title=title,
provenance={
"workflow_id": workflow.id,
"stage_id": stage.id,
"input_artifact_id": input_record.artifact_id,
**({"provider": provider} if provider else {}),
},
relationships=[
{
"type": "generated_from",
"target": input_record.artifact_id,
}
],
)
if stage.output.kind != "evaluation":
register_artifact(
root,
artifact_id=artifact_id,
path=output_path,
kind=stage.output.kind,
title=title,
provenance={
"workflow_id": workflow.id,
"stage_id": stage.id,
"input_artifact_id": input_record.artifact_id,
**({"provider": provider} if provider else {}),
},
relationships=[
{
"type": "generated_from",
"target": input_record.artifact_id,
}
],
)
return WorkflowOutputRecord(
stage_id=stage.id,
artifact_id=artifact_id,