Files
infospace-bench/src/infospace_bench/generator.py
tegwick f818acfc62 IB-WP-0018-T03+T04: shadow sampling + report/CLI surfacing; close IB-WP-0018
T03 — wrap_with_shadow_sampling() helper in routing.py: builds a
llm-connect ShadowingAdapter around any candidate LLMAdapter with a
caller-supplied baseline, grader, and QualityLedger. async_shadow=True
by default so production load is not doubled; on_shadow_error escape
hatch keeps caller logs informed when a baseline outage swallows the
shadow path. The returned adapter is still an LLMAdapter so it slots
into a RoutingPolicy rule without further code change.

T04 — generation report enrichment plus a small CLI helper:

- _collect_adapter_choices walks artifact provenance, groups by
  (stage_id, adapter_id), and surfaces calls + prompt/completion tokens
  per (stage, adapter) pair in a new ## Per-stage adapter choices
  section. Runs that did not go through the bridge have no
  provider_metadata.adapter_id and emit an empty list, so fixture-only
  reports stay terse.
- summarise_quality_ledger() rolls a llm-connect QualityLedger up by
  (task_type, adapter_id) with mean quality, mean cost, observations,
  and cumulative tokens.
- infospace-bench routing ledger <path> CLI prints the rollup as JSON.

Five new tests cover shadow happy-path, shadow failure isolation,
ledger rollup, the routing CLI, and the report's adapter-choice
aggregation. Closes IB-WP-0018: T01-T05 are all done and the workplan
status flips from blocked to done now that LLM-WP-0004's primitives
have shipped.

144 tests pass, 1 skipped (the OpenRouter live smoke, gated as before).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-18 11:52:05 +02:00

1028 lines
37 KiB
Python

from __future__ import annotations
import hashlib
import shutil
import time
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
_monotonic = time.monotonic
import yaml
from .checks import run_collection_checks
from .errors import InfospaceError
from .evaluation_io import read_entity_evaluations
from .history import get_history, read_metrics_file, record_check_results
from .lifecycle import create_infospace, load_infospace, register_artifact
from .openrouter import OpenRouterAssistedGenerationAdapter
from .budget import (
emit_token_event,
latest_plan_snapshot_id,
make_cost_resolver,
read_run_variance,
record_plan_snapshot,
record_run_usage,
record_run_variance,
)
from .source_intake import SourceChunk, normalize_source
from .workflow import (
AssistedGenerationAdapter,
FixtureAssistedGenerationAdapter,
WorkflowRunResult,
plan_workflow,
run_workflow,
)
STATE_PATH = Path("output/workflows/generation-state.yaml")
DEFAULT_PROFILE = "general-knowledge"
WORKFLOW_BY_STAGE = {
"summary": ["generic-source-summary"],
"summarize": ["generic-source-summary"],
"extract": ["generic-source-entities"],
"entities": ["generic-source-entities"],
"relations": ["generic-source-relations"],
"evaluate": ["generic-source-evaluations"],
"evaluation": ["generic-source-evaluations"],
"all": [
"generic-source-summary",
"generic-source-entities",
"generic-source-relations",
"generic-source-evaluations",
],
}
@dataclass(frozen=True)
class GenerationRunResult:
root: str
status: str
stage: str
skipped: bool = False
stale: bool = False
workflows: list[dict[str, Any]] = field(default_factory=list)
metrics: dict[str, Any] = field(default_factory=dict)
history_snapshot_id: str = ""
def to_dict(self) -> dict[str, Any]:
data = asdict(self)
return {key: value for key, value in data.items() if value not in ("", [], {})}
def init_generation_infospace(
workspace: str | Path,
source: str | Path,
slug: str,
*,
name: str,
profile: str = DEFAULT_PROFILE,
max_chunks: int | None = None,
chapter_filter: list[str] | None = None,
chunk_filter: list[str] | None = None,
from_chapter: int | None = None,
to_chapter: int | None = None,
) -> Any:
chunks = normalize_source(source, max_chunks=max_chunks)
chunks = _filter_chunks_by_chapter(
chunks,
chapter_filter=chapter_filter,
chunk_filter=chunk_filter,
from_chapter=from_chapter,
to_chapter=to_chapter,
)
if not chunks:
raise InfospaceError(
"empty_chapter_selection",
"Selection filters excluded every chunk; nothing to register",
{
"chapter_filter": chapter_filter,
"chunk_filter": chunk_filter,
"from_chapter": from_chapter,
"to_chapter": to_chapter,
},
)
infospace = create_infospace(Path(workspace), slug, name=name)
_install_profile(infospace.root, profile)
_write_workflows(infospace.root, profile)
_register_source_chunks(infospace.root, chunks)
_write_state(
infospace.root,
{
"profile": profile,
"source": str(Path(source)),
"source_chunks": _source_state(infospace.root),
"profile_digest": _profile_digest(infospace.root, profile),
"stage_status": {},
"completed": False,
"created_at": _now(),
"updated_at": _now(),
},
)
return load_infospace(infospace.root)
WORDS_PER_TOKEN_DEFAULT = 0.75
ENTITIES_PER_CHUNK_ESTIMATE = 2
_CALLS_PER_CHUNK_BY_WORKFLOW = {
"generic-source-summary": 1,
"generic-source-entities": 1,
"generic-source-relations": 1,
}
def plan_generation(
root: str | Path,
*,
stage: str = "all",
chapter_filter: list[str] | None = None,
chunk_filter: list[str] | None = None,
from_chapter: int | None = None,
to_chapter: int | None = None,
max_calls: int | None = None,
cost_cap: float | None = None,
cost_per_1k_tokens: float = 0.0,
words_per_token: float = WORDS_PER_TOKEN_DEFAULT,
entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE,
full: bool = False,
persist: bool = True,
) -> dict[str, Any]:
root_path = Path(root)
status = status_generation(root_path)
summary = plan_generation_summary(
root_path,
stage=stage,
chapter_filter=chapter_filter,
chunk_filter=chunk_filter,
from_chapter=from_chapter,
to_chapter=to_chapter,
max_calls=max_calls,
cost_cap=cost_cap,
cost_per_1k_tokens=cost_per_1k_tokens,
words_per_token=words_per_token,
entities_per_chunk=entities_per_chunk,
)
summary["chapter_filter"] = list(chapter_filter) if chapter_filter else None
summary["chunk_filter"] = list(chunk_filter) if chunk_filter else None
summary["from_chapter"] = from_chapter
summary["to_chapter"] = to_chapter
summary["root"] = str(root_path)
summary["stale"] = status["stale"]
summary["status"] = "planned"
if persist:
summary["snapshot_id"] = record_plan_snapshot(root_path, summary)
if not full:
return summary
workflow_ids = _workflow_ids_for_stage(stage)
plans: list[dict[str, Any]] = []
for workflow_id in workflow_ids:
try:
plans.append(plan_workflow(root_path, workflow_id).to_dict())
except InfospaceError as exc:
plans.append(
{
"workflow_id": workflow_id,
"status": "blocked",
"error": exc.to_dict(),
}
)
summary["workflows"] = plans
return summary
def plan_generation_summary(
root: str | Path,
*,
stage: str = "all",
chapter_filter: list[str] | None = None,
chunk_filter: list[str] | None = None,
from_chapter: int | None = None,
to_chapter: int | None = None,
max_calls: int | None = None,
cost_cap: float | None = None,
cost_per_1k_tokens: float = 0.0,
words_per_token: float = WORDS_PER_TOKEN_DEFAULT,
entities_per_chunk: int = ENTITIES_PER_CHUNK_ESTIMATE,
) -> dict[str, Any]:
root_path = Path(root)
infospace = load_infospace(root_path)
sources = [item for item in infospace.artifacts if item.kind == "source"]
selected = _select_source_chunks(
sources,
chapter_filter=chapter_filter,
chunk_filter=chunk_filter,
from_chapter=from_chapter,
to_chapter=to_chapter,
)
workflow_ids = _workflow_ids_for_stage(stage)
profile_name = _read_profile_name(root_path)
template_words = _profile_template_words(root_path, profile_name)
chunk_word_total = sum(_source_word_count(root_path, item) for item in selected)
per_stage: list[dict[str, Any]] = []
total_calls = 0
total_prompt_words = 0
for workflow_id in workflow_ids:
if workflow_id == "generic-source-evaluations":
calls = len(selected) * max(0, entities_per_chunk)
template_label = "evaluate-entity"
entity_words_estimate = 80
prompt_words = calls * (
template_words.get(template_label, 0) + entity_words_estimate
)
else:
calls = len(selected) * _CALLS_PER_CHUNK_BY_WORKFLOW.get(workflow_id, 0)
template_label = _template_for_workflow(workflow_id)
prompt_words = calls * template_words.get(template_label, 0) + chunk_word_total * (
1 if calls else 0
)
per_stage.append(
{
"workflow_id": workflow_id,
"calls": calls,
"prompt_words_estimate": prompt_words,
}
)
total_calls += calls
total_prompt_words += prompt_words
total_tokens = int(round(total_prompt_words / words_per_token)) if words_per_token > 0 else 0
cost: float | None = None
if cost_per_1k_tokens > 0:
cost = round((total_tokens / 1000.0) * cost_per_1k_tokens, 4)
chapter_numbers = sorted(
{
int(item.provenance.get("chapter_number"))
for item in selected
if isinstance(item.provenance.get("chapter_number"), int)
}
)
return {
"stage": stage,
"source_chunk_count": len(sources),
"selected_chunk_count": len(selected),
"selected_chunk_ids": [item.id.split("/", 1)[-1].rsplit(".md", 1)[0] for item in selected],
"selected_chapter_numbers": chapter_numbers,
"per_workflow": per_stage,
"total_provider_calls_estimate": total_calls,
"total_prompt_words_estimate": total_prompt_words,
"total_prompt_tokens_estimate": total_tokens,
"estimated_cost_usd": cost,
"cost_per_1k_tokens": cost_per_1k_tokens or None,
"words_per_token": words_per_token,
"entities_per_chunk_estimate": entities_per_chunk,
"max_calls": max_calls,
"cost_cap": cost_cap,
"exceeds_max_calls": bool(max_calls is not None and total_calls > max_calls),
"exceeds_cost_cap": bool(cost_cap is not None and cost is not None and cost > cost_cap),
}
def _filter_chunks_by_chapter(
chunks: list[SourceChunk],
*,
chapter_filter: list[str] | None,
chunk_filter: list[str] | None,
from_chapter: int | None,
to_chapter: int | None,
) -> list[SourceChunk]:
chunk_set = {value.strip() for value in (chunk_filter or []) if value.strip()}
label_set = {value.strip().lower() for value in (chapter_filter or []) if value.strip()}
if not chunk_set and not label_set and from_chapter is None and to_chapter is None:
return list(chunks)
out: list[SourceChunk] = []
for chunk in chunks:
if chunk_set and chunk.chunk_id not in chunk_set:
continue
if label_set:
label = (chunk.chapter_label or "").strip().lower()
number_match = (
chunk.chapter_number is not None
and str(chunk.chapter_number) in label_set
)
label_match = label in label_set if label else False
if not (number_match or label_match):
continue
if from_chapter is not None or to_chapter is not None:
if chunk.chapter_number is None:
continue
if from_chapter is not None and chunk.chapter_number < from_chapter:
continue
if to_chapter is not None and chunk.chapter_number > to_chapter:
continue
out.append(chunk)
return out
def _select_source_chunks(
sources: list[Any],
*,
chapter_filter: list[str] | None,
chunk_filter: list[str] | None,
from_chapter: int | None,
to_chapter: int | None,
) -> list[Any]:
chunk_set = {value.strip() for value in (chunk_filter or []) if value.strip()}
label_set = {value.strip().lower() for value in (chapter_filter or []) if value.strip()}
out: list[Any] = []
for item in sources:
chunk_id = item.provenance.get("chunk_id") or item.id.split("/", 1)[-1].rsplit(".md", 1)[0]
if chunk_set and chunk_id not in chunk_set:
continue
chapter_number = item.provenance.get("chapter_number")
chapter_label = (item.provenance.get("chapter_label") or "").strip().lower()
if label_set:
number_match = (
isinstance(chapter_number, int) and str(chapter_number) in label_set
)
label_match = chapter_label in label_set if chapter_label else False
if not (number_match or label_match):
continue
if from_chapter is not None or to_chapter is not None:
if not isinstance(chapter_number, int):
continue
if from_chapter is not None and chapter_number < from_chapter:
continue
if to_chapter is not None and chapter_number > to_chapter:
continue
out.append(item)
return out
def _template_for_workflow(workflow_id: str) -> str:
mapping = {
"generic-source-summary": "summarize-source",
"generic-source-entities": "extract-entities",
"generic-source-relations": "extract-relations",
"generic-source-evaluations": "evaluate-entity",
}
return mapping.get(workflow_id, "")
def _profile_template_words(root: Path, profile: str) -> dict[str, int]:
template_dir = Path(root) / "profiles" / profile / "templates"
counts: dict[str, int] = {}
if not template_dir.is_dir():
return counts
for path in template_dir.glob("*.md"):
try:
text = path.read_text(encoding="utf-8")
except OSError:
continue
counts[path.stem] = len(text.split())
return counts
def _source_word_count(root: Path, artifact: Any) -> int:
path = Path(root) / artifact.path
try:
return len(path.read_text(encoding="utf-8").split())
except OSError:
return 0
def _read_profile_name(root: Path) -> str:
state = _read_state(root)
return str(state.get("profile") or DEFAULT_PROFILE)
def _format_variance_line(summary: dict[str, Any] | None) -> str:
if not summary:
return ""
calls = summary.get("calls") or {}
cost = summary.get("cost_usd") or {}
parts: list[str] = []
calls_actual = calls.get("actual")
calls_estimated = calls.get("estimated")
if calls_actual is not None:
if calls_estimated is not None:
parts.append(f"calls {calls_actual}/{calls_estimated}")
else:
parts.append(f"calls {calls_actual} (no plan)")
actual_cost = cost.get("actual_total")
estimated_cost = cost.get("estimated")
if actual_cost is not None:
if estimated_cost is not None:
parts.append(f"cost ${actual_cost:.4f}/${estimated_cost:.4f}")
elif actual_cost > 0:
parts.append(f"cost ${actual_cost:.4f}")
if not parts:
return ""
return "- " + " · ".join(parts)
def _workspace_for(root: Path) -> Path:
"""Resolve the workspace directory that contains this infospace.
The standard layout is ``<workspace>/infospaces/<slug>``, so the
workspace is two levels above the infospace root.
"""
return root.parent.parent
def run_generation(
root: str | Path,
*,
stage: str = "all",
provider: str = "fixture",
model: str = "",
fixture_responses: str | Path | None = None,
resume: bool = False,
force: bool = False,
) -> GenerationRunResult:
root_path = Path(root)
stage_key = stage.strip().lower()
state = _read_state(root_path)
status = status_generation(root_path)
workflow_ids = _workflow_ids_for_stage(stage_key)
if resume and not force and state.get("completed") is True and not status["stale"]:
return GenerationRunResult(
root=str(root_path),
status="skipped",
stage=stage,
skipped=True,
stale=False,
workflows=[],
metrics=status.get("metrics", {}),
)
started_wall = datetime.now(timezone.utc)
monotonic_start = _monotonic()
adapter = (
_adapter_for(provider, model=model, fixture_responses=fixture_responses)
if workflow_ids
else None
)
workflow_results: list[dict[str, Any]] = []
for workflow_id in workflow_ids:
result = run_workflow(root_path, workflow_id, assisted_adapter=adapter)
workflow_results.append(result.to_dict())
state = _mark_workflow_completed(state, result)
if workflow_results:
duration_seconds = round(_monotonic() - monotonic_start, 3)
usage_entry = record_run_usage(
root_path,
workflow_results,
snapshot_id=latest_plan_snapshot_id(root_path),
duration_seconds=duration_seconds,
started_at=started_wall.isoformat(),
cost_resolver=make_cost_resolver(_workspace_for(root_path)),
)
record_run_variance(root_path, usage_entry)
emit_token_event(
usage_entry,
infospace_slug=load_infospace(root_path).config.slug,
workspace=str(_workspace_for(root_path)),
)
metrics: dict[str, Any] = {}
snapshot_id = ""
if stage_key in {"all", "metrics"}:
check_result = _record_metrics(root_path)
metrics = check_result.metrics
snapshot_id = check_result.snapshot.snapshot_id
_write_generation_report(root_path, metrics, snapshot_id)
state.update(
{
"source_chunks": _source_state(root_path),
"profile_digest": _profile_digest(root_path, str(state.get("profile") or DEFAULT_PROFILE)),
"completed": stage_key in {"all", "metrics"},
"updated_at": _now(),
"last_run": {
"stage": stage,
"provider": provider,
"model": model,
"workflow_count": len(workflow_results),
"snapshot_id": snapshot_id,
"completed_at": _now(),
},
}
)
_write_state(root_path, state)
return GenerationRunResult(
root=str(root_path),
status="completed",
stage=stage,
skipped=False,
stale=False,
workflows=workflow_results,
metrics=metrics,
history_snapshot_id=snapshot_id,
)
def status_generation(root: str | Path) -> dict[str, Any]:
root_path = Path(root)
infospace = load_infospace(root_path)
state = _read_state(root_path)
stale_sources = _stale_source_ids(infospace.root)
profile = str(state.get("profile") or DEFAULT_PROFILE)
stale_profile = bool(
state.get("profile_digest")
and state.get("profile_digest") != _profile_digest(infospace.root, profile)
)
evaluations = read_entity_evaluations(infospace.root / "output" / "evaluations")
history = get_history(infospace.root)
return {
"root": str(infospace.root),
"slug": infospace.config.slug,
"profile": profile,
"source_chunk_count": sum(1 for item in infospace.artifacts if item.kind == "source"),
"entity_count": sum(1 for item in infospace.artifacts if item.kind == "entity"),
"relation_count": sum(1 for item in infospace.artifacts if item.kind == "relation"),
"evaluation_count": len(evaluations),
"generated_count": sum(1 for item in infospace.artifacts if item.kind == "generated"),
"metrics": read_metrics_file(infospace.root / "output" / "metrics" / "metrics.yaml"),
"history_snapshot_count": len(history),
"latest_snapshot_id": history[-1].snapshot_id if history else "",
"stale": bool(stale_sources or stale_profile),
"stale_sources": stale_sources,
"stale_profile": stale_profile,
"completed": bool(state.get("completed", False)),
"stage_status": state.get("stage_status", {}),
"budget_summary": read_run_variance(infospace.root),
}
def _adapter_for(
provider: str,
*,
model: str,
fixture_responses: str | Path | None,
) -> AssistedGenerationAdapter:
if fixture_responses:
return FixtureAssistedGenerationAdapter.from_file(Path(fixture_responses))
if provider == "openrouter":
return OpenRouterAssistedGenerationAdapter(model=model)
raise InfospaceError(
"missing_assisted_generation_adapter",
"Assisted generation requires --fixture-responses or --provider openrouter",
{"provider": provider},
)
def _register_source_chunks(root: Path, chunks: list[SourceChunk]) -> None:
for chunk in chunks:
path = root / "artifacts" / "sources" / f"{chunk.chunk_id}.md"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(chunk.markdown, encoding="utf-8")
register_artifact(
root,
artifact_id=f"source/{chunk.chunk_id}.md",
path=path,
kind="source",
title=chunk.title,
provenance={
"original_path": chunk.original_path,
"source_type": chunk.source_type,
"digest": chunk.digest,
"chunk_id": chunk.chunk_id,
"chunk_index": chunk.chunk_index,
"chunk_count": chunk.chunk_count,
"imported_at": chunk.imported_at,
"extractor_version": chunk.extractor_version,
"section_role": chunk.section_role,
"spine_index": chunk.spine_index,
"book_metadata": dict(chunk.book_metadata),
"chapter_label": chunk.chapter_label,
"chapter_number": chunk.chapter_number,
"page_anchors": list(chunk.page_anchors),
},
)
def _install_profile(root: Path, profile: str) -> None:
source = Path(__file__).parent / "profiles" / profile
if not source.is_dir():
raise InfospaceError(
"missing_generation_profile",
f"Generation profile does not exist: {profile}",
{"profile": profile, "path": str(source)},
)
profile_target = root / "profiles" / profile
template_target = root / "workflows" / "templates" / profile
shutil.copytree(source, profile_target, dirs_exist_ok=True)
shutil.copytree(source / "templates", template_target, dirs_exist_ok=True)
def _write_workflows(root: Path, profile: str) -> None:
config_path = root / "infospace.yaml"
config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
config["schemas"] = {
**dict(config.get("schemas") or {}),
"entity": f"profiles/{profile}/contracts/entity.contract.md",
"relation": f"profiles/{profile}/contracts/relation.contract.md",
"evaluation": f"profiles/{profile}/contracts/evaluation.contract.md",
}
config["workflows"] = _profile_workflows(profile)
config_path.write_text(yaml.safe_dump(config, sort_keys=False), encoding="utf-8")
def _profile_workflows(profile: str) -> list[dict[str, Any]]:
base = f"workflows/templates/{profile}"
return [
{
"id": "generic-source-summary",
"description": "Summarize normalized source chunks.",
"inputs": {"source": {"kind": "source"}},
"static_macros": {"profile": profile},
"stages": [
{
"id": "summarize-source",
"kind": "assisted",
"input": "source",
"template": f"{base}/summarize-source.md",
"provider_hint": "openrouter",
"output": {
"path": "artifacts/generated/{{ input.slug }}-summary.md",
"artifact_id": "generated/{{ input.slug }}-summary.md",
"kind": "generated",
"title": "{{ input.title }} Summary",
},
}
],
},
{
"id": "generic-source-entities",
"description": "Extract reusable entity artifacts from source chunks.",
"inputs": {"source": {"kind": "source"}},
"static_macros": {"profile": profile},
"stages": [
{
"id": "extract-entities",
"kind": "assisted",
"input": "source",
"template": f"{base}/extract-entities.md",
"provider_hint": "openrouter",
"output": {
"path": "artifacts/generated/{{ input.slug }}-entities.md",
"artifact_id": "generated/{{ input.slug }}-entities.md",
"kind": "generated",
"title": "{{ input.title }} Entity Bundle",
},
},
{
"id": "split-entities",
"kind": "split_entities",
"input": "source",
"template": "",
"static_macros": {"bundle_stage": "extract-entities"},
},
],
},
{
"id": "generic-source-relations",
"description": "Extract relation artifacts from source chunks.",
"inputs": {"source": {"kind": "source"}},
"static_macros": {"profile": profile},
"stages": [
{
"id": "extract-relations",
"kind": "assisted",
"input": "source",
"template": f"{base}/extract-relations.md",
"provider_hint": "openrouter",
"output": {
"path": "artifacts/relations/{{ input.slug }}-relations.md",
"artifact_id": "relation/{{ input.slug }}-relations.md",
"kind": "relation",
"title": "{{ input.title }} Relations",
},
}
],
},
{
"id": "generic-source-evaluations",
"description": "Evaluate generated entities with the profile rubric.",
"inputs": {"entity": {"kind": "entity"}},
"static_macros": {"profile": profile},
"stages": [
{
"id": "evaluate-entity",
"kind": "assisted",
"input": "entity",
"template": f"{base}/evaluate-entity.md",
"provider_hint": "openrouter",
"output": {
"path": "output/evaluations/{{ input.slug }}.md",
"artifact_id": "generated/evaluation-{{ input.slug }}.md",
"kind": "generated",
"title": "{{ input.title }} Evaluation",
},
}
],
},
]
def _record_metrics(root: Path) -> Any:
infospace = load_infospace(root)
return record_check_results(
infospace.root,
run_collection_checks(infospace.artifacts),
artifact_evaluations=read_entity_evaluations(infospace.root / "output" / "evaluations"),
schema_name="generic-source",
metadata={"generator": "generic-source"},
)
def _write_generation_report(root: Path, metrics: dict[str, Any], snapshot_id: str) -> None:
status = status_generation(root)
review = _collect_review_report(root)
lines = [
"# Generation Report",
"",
f"Snapshot: {snapshot_id}",
f"Sources: {status['source_chunk_count']}",
f"Entities: {status['entity_count']}",
f"Relations: {status['relation_count']}",
f"Evaluations: {status['evaluation_count']}",
"",
"## Metrics",
"",
*[f"- {name}: {value}" for name, value in sorted(metrics.items())],
"",
]
variance_line = _format_variance_line(status.get("budget_summary"))
if variance_line:
lines.extend(["## Plan variance", "", variance_line, ""])
if review["chapter_coverage"]:
lines.extend(["## Chapter coverage", ""])
for row in review["chapter_coverage"]:
label = row["chapter_label"] or ""
number = row["chapter_number"]
number_text = f"{number:02d}" if isinstance(number, int) else ""
lines.append(
f"- Chapter {number_text} ({label}): "
f"{row['source_count']} source chunk(s), "
f"{row['entity_count']} entity, "
f"{row['relation_count']} relation, "
f"{row['anchor_count']} page anchor"
)
lines.append("")
if review["entity_titles"]:
lines.extend(["## Entities", ""])
for title in review["entity_titles"]:
lines.append(f"- {title}")
lines.append("")
if review["unmapped_sources"]:
lines.extend(
[
"## Unmapped source chunks",
"",
"These source chunks have no generated artifact pointing back to "
"them. Re-run the missing stages or annotate the gap before "
"scaling beyond the current selection.",
"",
]
)
for chunk_id in review["unmapped_sources"]:
lines.append(f"- `{chunk_id}`")
lines.append("")
if review["page_anchor_total"]:
lines.extend(
[
"## Page anchors",
"",
f"- Total distinct anchors: {review['page_anchor_total']}",
f"- Sample: {', '.join(review['page_anchor_sample'])}",
"",
]
)
if review.get("adapter_choices"):
lines.extend(["## Per-stage adapter choices", ""])
for row in review["adapter_choices"]:
lines.append(
f"- `{row['stage_id']}` ({row['task_type']}) -> "
f"`{row['adapter_id']}` · {row['calls']} call(s) · "
f"{row['prompt_tokens']} prompt + {row['completion_tokens']} completion tokens"
)
lines.append("")
text = "\n".join(lines)
path = root / "reports" / "generation-summary.md"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(text, encoding="utf-8")
register_artifact(
root,
artifact_id="generated/generation-summary.md",
path=path,
kind="generated",
title="Generation Summary",
provenance={"workflow_id": "generic-source-generator", "snapshot_id": snapshot_id},
)
def _collect_review_report(root: Path) -> dict[str, Any]:
"""Build the review-oriented payload that feeds the generation report.
Returns chapter coverage rows, the entity title list, the unmapped source
chunk ids (sources with no downstream generated artifact), and a page
anchor count plus a small deterministic sample.
"""
infospace = load_infospace(root)
sources = [item for item in infospace.artifacts if item.kind == "source"]
generated = [item for item in infospace.artifacts if item.kind != "source"]
downstream_by_source: dict[str, list[Any]] = {}
for item in generated:
for rel in item.relationships or []:
if rel.get("type") != "generated_from":
continue
target = str(rel.get("target") or "")
if not target:
continue
downstream_by_source.setdefault(target, []).append(item)
chapter_rows: dict[Any, dict[str, Any]] = {}
anchors: list[str] = []
seen_anchors: set[str] = set()
unmapped: list[str] = []
for source in sources:
provenance = source.provenance or {}
chapter_number = provenance.get("chapter_number")
chapter_label = provenance.get("chapter_label") or ""
key = chapter_number if chapter_number is not None else f"_label:{chapter_label or source.id}"
row = chapter_rows.setdefault(
key,
{
"chapter_number": chapter_number,
"chapter_label": chapter_label,
"source_count": 0,
"entity_count": 0,
"relation_count": 0,
"anchor_count": 0,
},
)
row["source_count"] += 1
row["anchor_count"] += len(provenance.get("page_anchors") or [])
downstream = downstream_by_source.get(source.id, [])
if not downstream:
chunk_id = provenance.get("chunk_id") or source.id.split("/", 1)[-1].rsplit(".md", 1)[0]
unmapped.append(chunk_id)
for item in downstream:
if item.kind == "entity":
row["entity_count"] += 1
elif item.kind == "relation":
row["relation_count"] += 1
for anchor in provenance.get("page_anchors") or []:
if anchor not in seen_anchors:
seen_anchors.add(anchor)
anchors.append(anchor)
def _sort_key(item: tuple[Any, dict[str, Any]]) -> tuple[int, int, str]:
row = item[1]
number = row.get("chapter_number")
if isinstance(number, int):
return (0, number, "")
return (1, 0, row.get("chapter_label") or "")
chapter_coverage = [row for _key, row in sorted(chapter_rows.items(), key=_sort_key)]
entity_titles = sorted(
{item.title for item in infospace.artifacts if item.kind == "entity" and item.title}
)
adapter_choices = _collect_adapter_choices(generated)
return {
"chapter_coverage": chapter_coverage,
"entity_titles": entity_titles,
"unmapped_sources": unmapped,
"page_anchor_total": len(anchors),
"page_anchor_sample": anchors[:6],
"adapter_choices": adapter_choices,
}
def _collect_adapter_choices(generated: list[Any]) -> list[dict[str, Any]]:
"""Roll up which adapter ran each stage when the routing bridge was used.
Returns one row per (stage_id, adapter_id) with call counts and
cumulative tokens. Entries without provider_metadata are skipped so
fixture-only runs produce an empty list rather than a noisy section.
"""
buckets: dict[tuple[str, str], dict[str, Any]] = {}
for item in generated:
provenance = item.provenance or {}
metadata = provenance.get("provider_metadata") or {}
if not isinstance(metadata, dict):
continue
adapter_id = str(metadata.get("adapter_id") or metadata.get("model") or "")
if not adapter_id:
continue
stage_id = str(metadata.get("stage_id") or provenance.get("stage_id") or "")
if not stage_id:
continue
usage = metadata.get("usage") or {}
key = (stage_id, adapter_id)
bucket = buckets.setdefault(
key,
{
"stage_id": stage_id,
"adapter_id": adapter_id,
"task_type": metadata.get("task_type") or stage_id,
"calls": 0,
"prompt_tokens": 0,
"completion_tokens": 0,
},
)
bucket["calls"] += 1
bucket["prompt_tokens"] += int(usage.get("prompt_tokens") or 0)
bucket["completion_tokens"] += int(usage.get("completion_tokens") or 0)
return sorted(buckets.values(), key=lambda row: (row["stage_id"], row["adapter_id"]))
def _workflow_ids_for_stage(stage: str) -> list[str]:
normalized = stage.strip().lower()
if normalized == "intake":
return []
if normalized == "metrics":
return []
if normalized not in WORKFLOW_BY_STAGE:
raise InfospaceError(
"invalid_generation_stage",
f"Unsupported generation stage: {stage}",
{
"stage": stage,
"valid_stages": sorted([*WORKFLOW_BY_STAGE, "intake", "metrics"]),
},
)
return WORKFLOW_BY_STAGE[normalized]
def _source_state(root: Path) -> dict[str, Any]:
infospace = load_infospace(root)
return {
item.id: {
"path": item.path,
"digest": item.provenance.get("digest", ""),
"title": item.title,
"source_type": item.provenance.get("source_type", ""),
"chunk_id": item.provenance.get("chunk_id", ""),
}
for item in infospace.artifacts
if item.kind == "source"
}
def _stale_source_ids(root: Path) -> list[str]:
infospace = load_infospace(root)
stale: list[str] = []
for item in infospace.artifacts:
if item.kind != "source":
continue
path = infospace.root / item.path
expected = str(item.provenance.get("digest") or "")
if not path.is_file() or (expected and _digest_text(path.read_text(encoding="utf-8")) != expected):
stale.append(item.id)
return stale
def _mark_workflow_completed(
state: dict[str, Any],
result: WorkflowRunResult,
) -> dict[str, Any]:
stage_status = dict(state.get("stage_status") or {})
stage_status[result.workflow_id] = {
"status": result.status,
"run_id": result.run_id,
"output_artifact_ids": [output.artifact_id for output in result.outputs],
"updated_at": _now(),
}
return {**state, "stage_status": stage_status}
def _profile_digest(root: Path, profile: str) -> str:
files: list[Path] = []
for base in (
root / "profiles" / profile,
root / "workflows" / "templates" / profile,
):
if base.is_dir():
files.extend(path for path in sorted(base.rglob("*")) if path.is_file())
hasher = hashlib.sha256()
for path in files:
hasher.update(str(path.relative_to(root)).encode("utf-8"))
hasher.update(path.read_bytes())
return hasher.hexdigest()
def _read_state(root: Path) -> dict[str, Any]:
path = root / STATE_PATH
if not path.is_file():
return {}
data = yaml.safe_load(path.read_text(encoding="utf-8"))
return data if isinstance(data, dict) else {}
def _write_state(root: Path, state: dict[str, Any]) -> None:
path = root / STATE_PATH
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(yaml.safe_dump(state, sort_keys=False), encoding="utf-8")
def _digest_text(text: str) -> str:
return hashlib.sha256(text.encode("utf-8")).hexdigest()
def _now() -> str:
return datetime.now(timezone.utc).isoformat()