generated from coulomb/repo-seed
IB-WP-0019-T04: plan-vs-actual variance and surfacing
After every generate run, compute variance between the executing plan
snapshot and the just-recorded usage rollup, persist it to
output/budget/summary.yaml (overwrite-on-run), and surface it both in
the generate status JSON (new budget_summary field) and as a "Plan
variance" line in reports/generation-summary.md.
Variance fields: calls / prompt_tokens / total_tokens each carry
{estimated, actual, delta, ratio}; cost_usd carries {estimated,
actual_known, actual_estimated_from_rates, actual_total, delta, ratio};
per_workflow rolls the per-bucket usage up to the same workflow_id grain
the plan reports. Runs whose snapshot_id cannot be resolved (no prior
plan, or pruned from the retention window) still record a variance row
with null comparison fields and snapshot_resolved=false, so the
consumer always sees a current summary.
Reordered run_generation so usage and variance are written before the
generation report, allowing the report to embed the variance line on
the same pass.
110 tests pass.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -21,8 +21,10 @@ from .openrouter import OpenRouterAssistedGenerationAdapter
|
||||
from .budget import (
|
||||
latest_plan_snapshot_id,
|
||||
make_cost_resolver,
|
||||
read_run_variance,
|
||||
record_plan_snapshot,
|
||||
record_run_usage,
|
||||
record_run_variance,
|
||||
)
|
||||
from .source_intake import SourceChunk, normalize_source
|
||||
from .workflow import (
|
||||
@@ -325,6 +327,31 @@ def _read_profile_name(root: Path) -> str:
|
||||
return str(state.get("profile") or DEFAULT_PROFILE)
|
||||
|
||||
|
||||
def _format_variance_line(summary: dict[str, Any] | None) -> str:
|
||||
if not summary:
|
||||
return ""
|
||||
calls = summary.get("calls") or {}
|
||||
cost = summary.get("cost_usd") or {}
|
||||
parts: list[str] = []
|
||||
calls_actual = calls.get("actual")
|
||||
calls_estimated = calls.get("estimated")
|
||||
if calls_actual is not None:
|
||||
if calls_estimated is not None:
|
||||
parts.append(f"calls {calls_actual}/{calls_estimated}")
|
||||
else:
|
||||
parts.append(f"calls {calls_actual} (no plan)")
|
||||
actual_cost = cost.get("actual_total")
|
||||
estimated_cost = cost.get("estimated")
|
||||
if actual_cost is not None:
|
||||
if estimated_cost is not None:
|
||||
parts.append(f"cost ${actual_cost:.4f}/${estimated_cost:.4f}")
|
||||
elif actual_cost > 0:
|
||||
parts.append(f"cost ${actual_cost:.4f}")
|
||||
if not parts:
|
||||
return ""
|
||||
return "- " + " · ".join(parts)
|
||||
|
||||
|
||||
def _workspace_for(root: Path) -> Path:
|
||||
"""Resolve the workspace directory that contains this infospace.
|
||||
|
||||
@@ -373,6 +400,18 @@ def run_generation(
|
||||
workflow_results.append(result.to_dict())
|
||||
state = _mark_workflow_completed(state, result)
|
||||
|
||||
if workflow_results:
|
||||
duration_seconds = round(_monotonic() - monotonic_start, 3)
|
||||
usage_entry = record_run_usage(
|
||||
root_path,
|
||||
workflow_results,
|
||||
snapshot_id=latest_plan_snapshot_id(root_path),
|
||||
duration_seconds=duration_seconds,
|
||||
started_at=started_wall.isoformat(),
|
||||
cost_resolver=make_cost_resolver(_workspace_for(root_path)),
|
||||
)
|
||||
record_run_variance(root_path, usage_entry)
|
||||
|
||||
metrics: dict[str, Any] = {}
|
||||
snapshot_id = ""
|
||||
if stage_key in {"all", "metrics"}:
|
||||
@@ -398,16 +437,6 @@ def run_generation(
|
||||
}
|
||||
)
|
||||
_write_state(root_path, state)
|
||||
if workflow_results:
|
||||
duration_seconds = round(_monotonic() - monotonic_start, 3)
|
||||
record_run_usage(
|
||||
root_path,
|
||||
workflow_results,
|
||||
snapshot_id=latest_plan_snapshot_id(root_path),
|
||||
duration_seconds=duration_seconds,
|
||||
started_at=started_wall.isoformat(),
|
||||
cost_resolver=make_cost_resolver(_workspace_for(root_path)),
|
||||
)
|
||||
return GenerationRunResult(
|
||||
root=str(root_path),
|
||||
status="completed",
|
||||
@@ -449,6 +478,7 @@ def status_generation(root: str | Path) -> dict[str, Any]:
|
||||
"stale_profile": stale_profile,
|
||||
"completed": bool(state.get("completed", False)),
|
||||
"stage_status": state.get("stage_status", {}),
|
||||
"budget_summary": read_run_variance(infospace.root),
|
||||
}
|
||||
|
||||
|
||||
@@ -636,22 +666,24 @@ def _record_metrics(root: Path) -> Any:
|
||||
|
||||
def _write_generation_report(root: Path, metrics: dict[str, Any], snapshot_id: str) -> None:
|
||||
status = status_generation(root)
|
||||
text = "\n".join(
|
||||
[
|
||||
"# Generation Report",
|
||||
"",
|
||||
f"Snapshot: {snapshot_id}",
|
||||
f"Sources: {status['source_chunk_count']}",
|
||||
f"Entities: {status['entity_count']}",
|
||||
f"Relations: {status['relation_count']}",
|
||||
f"Evaluations: {status['evaluation_count']}",
|
||||
"",
|
||||
"## Metrics",
|
||||
"",
|
||||
*[f"- {name}: {value}" for name, value in sorted(metrics.items())],
|
||||
"",
|
||||
]
|
||||
)
|
||||
lines = [
|
||||
"# Generation Report",
|
||||
"",
|
||||
f"Snapshot: {snapshot_id}",
|
||||
f"Sources: {status['source_chunk_count']}",
|
||||
f"Entities: {status['entity_count']}",
|
||||
f"Relations: {status['relation_count']}",
|
||||
f"Evaluations: {status['evaluation_count']}",
|
||||
"",
|
||||
"## Metrics",
|
||||
"",
|
||||
*[f"- {name}: {value}" for name, value in sorted(metrics.items())],
|
||||
"",
|
||||
]
|
||||
variance_line = _format_variance_line(status.get("budget_summary"))
|
||||
if variance_line:
|
||||
lines.extend(["## Plan variance", "", variance_line, ""])
|
||||
text = "\n".join(lines)
|
||||
path = root / "reports" / "generation-summary.md"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(text, encoding="utf-8")
|
||||
|
||||
Reference in New Issue
Block a user