generated from coulomb/repo-seed
IB-WP-0016-T07: review report and output policy; close IB-WP-0016
Enrich reports/generation-summary.md with the review-oriented sections that the 2026-05-17 smoke run flagged as missing: ## Chapter coverage (per-chapter source/entity/relation/anchor counts), ## Entities (the deduped title list), ## Unmapped source chunks (sources with no downstream generated artifact), and ## Page anchors (total plus deterministic sample). Sections are conditional on data being present so generic non-Lefevre runs stay terse. Add docs/lefevre-readiness.md as the final sign-off document for IB-WP-0016: what is wired (T01-T06 recap), an output policy table (checked-in fixture sources vs disposable generated infospaces vs archive targets), a seven-item reviewer checklist (duplicate entities, relation endpoints, weak evidence, overgeneralization, anchor coverage, unmapped sources, plan-vs-actual variance), a scale-up plan from one-chapter to full-book, and the load-bearing risks still outstanding (cross-chunk dedup, whole-run resume, adaptive routing deferred to LLM-WP-0004 / IB-WP-0018, rate-table drift). Closes IB-WP-0016 (Lefevre EPUB3 Infospace Readiness Pilot): T01-T07 all done; the workplan is set to status=done. 131 tests pass, 1 skipped (live OpenRouter smoke, correctly gated). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -730,6 +730,7 @@ def _record_metrics(root: Path) -> Any:
|
||||
|
||||
def _write_generation_report(root: Path, metrics: dict[str, Any], snapshot_id: str) -> None:
|
||||
status = status_generation(root)
|
||||
review = _collect_review_report(root)
|
||||
lines = [
|
||||
"# Generation Report",
|
||||
"",
|
||||
@@ -747,6 +748,49 @@ def _write_generation_report(root: Path, metrics: dict[str, Any], snapshot_id: s
|
||||
variance_line = _format_variance_line(status.get("budget_summary"))
|
||||
if variance_line:
|
||||
lines.extend(["## Plan variance", "", variance_line, ""])
|
||||
if review["chapter_coverage"]:
|
||||
lines.extend(["## Chapter coverage", ""])
|
||||
for row in review["chapter_coverage"]:
|
||||
label = row["chapter_label"] or "—"
|
||||
number = row["chapter_number"]
|
||||
number_text = f"{number:02d}" if isinstance(number, int) else "—"
|
||||
lines.append(
|
||||
f"- Chapter {number_text} ({label}): "
|
||||
f"{row['source_count']} source chunk(s), "
|
||||
f"{row['entity_count']} entity, "
|
||||
f"{row['relation_count']} relation, "
|
||||
f"{row['anchor_count']} page anchor"
|
||||
)
|
||||
lines.append("")
|
||||
if review["entity_titles"]:
|
||||
lines.extend(["## Entities", ""])
|
||||
for title in review["entity_titles"]:
|
||||
lines.append(f"- {title}")
|
||||
lines.append("")
|
||||
if review["unmapped_sources"]:
|
||||
lines.extend(
|
||||
[
|
||||
"## Unmapped source chunks",
|
||||
"",
|
||||
"These source chunks have no generated artifact pointing back to "
|
||||
"them. Re-run the missing stages or annotate the gap before "
|
||||
"scaling beyond the current selection.",
|
||||
"",
|
||||
]
|
||||
)
|
||||
for chunk_id in review["unmapped_sources"]:
|
||||
lines.append(f"- `{chunk_id}`")
|
||||
lines.append("")
|
||||
if review["page_anchor_total"]:
|
||||
lines.extend(
|
||||
[
|
||||
"## Page anchors",
|
||||
"",
|
||||
f"- Total distinct anchors: {review['page_anchor_total']}",
|
||||
f"- Sample: {', '.join(review['page_anchor_sample'])}",
|
||||
"",
|
||||
]
|
||||
)
|
||||
text = "\n".join(lines)
|
||||
path = root / "reports" / "generation-summary.md"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -761,6 +805,82 @@ def _write_generation_report(root: Path, metrics: dict[str, Any], snapshot_id: s
|
||||
)
|
||||
|
||||
|
||||
def _collect_review_report(root: Path) -> dict[str, Any]:
|
||||
"""Build the review-oriented payload that feeds the generation report.
|
||||
|
||||
Returns chapter coverage rows, the entity title list, the unmapped source
|
||||
chunk ids (sources with no downstream generated artifact), and a page
|
||||
anchor count plus a small deterministic sample.
|
||||
"""
|
||||
infospace = load_infospace(root)
|
||||
sources = [item for item in infospace.artifacts if item.kind == "source"]
|
||||
generated = [item for item in infospace.artifacts if item.kind != "source"]
|
||||
downstream_by_source: dict[str, list[Any]] = {}
|
||||
for item in generated:
|
||||
for rel in item.relationships or []:
|
||||
if rel.get("type") != "generated_from":
|
||||
continue
|
||||
target = str(rel.get("target") or "")
|
||||
if not target:
|
||||
continue
|
||||
downstream_by_source.setdefault(target, []).append(item)
|
||||
|
||||
chapter_rows: dict[Any, dict[str, Any]] = {}
|
||||
anchors: list[str] = []
|
||||
seen_anchors: set[str] = set()
|
||||
unmapped: list[str] = []
|
||||
for source in sources:
|
||||
provenance = source.provenance or {}
|
||||
chapter_number = provenance.get("chapter_number")
|
||||
chapter_label = provenance.get("chapter_label") or ""
|
||||
key = chapter_number if chapter_number is not None else f"_label:{chapter_label or source.id}"
|
||||
row = chapter_rows.setdefault(
|
||||
key,
|
||||
{
|
||||
"chapter_number": chapter_number,
|
||||
"chapter_label": chapter_label,
|
||||
"source_count": 0,
|
||||
"entity_count": 0,
|
||||
"relation_count": 0,
|
||||
"anchor_count": 0,
|
||||
},
|
||||
)
|
||||
row["source_count"] += 1
|
||||
row["anchor_count"] += len(provenance.get("page_anchors") or [])
|
||||
downstream = downstream_by_source.get(source.id, [])
|
||||
if not downstream:
|
||||
chunk_id = provenance.get("chunk_id") or source.id.split("/", 1)[-1].rsplit(".md", 1)[0]
|
||||
unmapped.append(chunk_id)
|
||||
for item in downstream:
|
||||
if item.kind == "entity":
|
||||
row["entity_count"] += 1
|
||||
elif item.kind == "relation":
|
||||
row["relation_count"] += 1
|
||||
for anchor in provenance.get("page_anchors") or []:
|
||||
if anchor not in seen_anchors:
|
||||
seen_anchors.add(anchor)
|
||||
anchors.append(anchor)
|
||||
|
||||
def _sort_key(item: tuple[Any, dict[str, Any]]) -> tuple[int, int, str]:
|
||||
row = item[1]
|
||||
number = row.get("chapter_number")
|
||||
if isinstance(number, int):
|
||||
return (0, number, "")
|
||||
return (1, 0, row.get("chapter_label") or "")
|
||||
|
||||
chapter_coverage = [row for _key, row in sorted(chapter_rows.items(), key=_sort_key)]
|
||||
entity_titles = sorted(
|
||||
{item.title for item in infospace.artifacts if item.kind == "entity" and item.title}
|
||||
)
|
||||
return {
|
||||
"chapter_coverage": chapter_coverage,
|
||||
"entity_titles": entity_titles,
|
||||
"unmapped_sources": unmapped,
|
||||
"page_anchor_total": len(anchors),
|
||||
"page_anchor_sample": anchors[:6],
|
||||
}
|
||||
|
||||
|
||||
def _workflow_ids_for_stage(stage: str) -> list[str]:
|
||||
normalized = stage.strip().lower()
|
||||
if normalized == "intake":
|
||||
|
||||
Reference in New Issue
Block a user