INFRA-TASKS #5 — process_chapters.py now skips writing *-prompt.md files when the corresponding output file already exists on disk. DB-only rebuilds no longer dirty the working tree with unchanged prompt content. INFRA-TASKS #8 — Added '## Quality Metrics' section to the entity and VSM mapping schemas, defining the five evaluation dimensions (Definition Precision, Source Grounding, Domain Placement, VSM Relevance, Explanatory Value) with 1–5 rubrics used by the evaluate-entity template. Also updated INFRA-TASKS.md to reflect current resolution status for tasks 4–19 across S2 and S3. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1208 lines
49 KiB
Python
1208 lines
49 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Infospace with History — Chapter Processing Pipeline
|
|
|
|
Processes chapters from Adam Smith's "The Wealth of Nations" through a
|
|
three-stage analysis pipeline, mapping economic content to Stafford Beer's
|
|
Viable System Model.
|
|
|
|
Pipeline per chapter:
|
|
1. extract-entities — Extract economic entities from chapter text
|
|
2. map-to-vsm — Map entities to VSM concepts
|
|
3. synthesize-analysis — Produce chapter-level VSM analysis
|
|
|
|
After all chapters:
|
|
4. assess-metrics — Evaluate completeness and consistency
|
|
|
|
Usage:
|
|
# Process a single chapter
|
|
python process_chapters.py --chapter book-1-chapter-01
|
|
|
|
# Process all chapters in Book I
|
|
python process_chapters.py --book 1
|
|
|
|
# Process all chapters
|
|
python process_chapters.py --all
|
|
|
|
# Assess metrics only (after chapters have been processed)
|
|
python process_chapters.py --metrics
|
|
|
|
# List available chapters
|
|
python process_chapters.py --list
|
|
"""
|
|
|
|
import argparse
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
# Add project root to path
|
|
project_root = Path(__file__).parent.parent.parent
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
from markitect.prompts.models import Artifact, ArtifactType
|
|
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
|
|
from markitect.prompts.dependencies.repository import SQLiteDependencyRepository
|
|
from markitect.prompts.services.artifact_service import ArtifactService
|
|
from markitect.prompts.templates.models import PromptTemplate
|
|
from markitect.prompts.templates.analyzer import TemplateAnalyzer
|
|
from markitect.prompts.resolver.resolver import PromptResolver
|
|
from markitect.prompts.resolver.compiler import ContextCompiler
|
|
from markitect.prompts.resolver.strategy import ResolutionConfig, MultiSpaceResolutionStrategy
|
|
from markitect.prompts.execution.manifest import RunManifest
|
|
from markitect.prompts.dependencies.graph import GraphBuilder
|
|
from markitect.prompts.traceability.service import TraceabilityService
|
|
from markitect.prompts.queries.operations import PromptQueryService
|
|
|
|
|
|
class ChapterProcessor:
|
|
"""Processes Wealth of Nations chapters through the VSM analysis pipeline."""
|
|
|
|
def __init__(
|
|
self,
|
|
example_dir: Path,
|
|
db_path: Optional[str] = None,
|
|
llm_adapter=None,
|
|
):
|
|
self.example_dir = example_dir
|
|
self.db_path = db_path or str(example_dir / "infospace.db")
|
|
self.llm_adapter = llm_adapter
|
|
|
|
# Initialize repositories
|
|
self.artifact_repo = SQLiteArtifactRepository(self.db_path)
|
|
self.dep_repo = SQLiteDependencyRepository(self.db_path)
|
|
self.artifact_service = ArtifactService(self.artifact_repo)
|
|
self.graph_builder = GraphBuilder(self.dep_repo)
|
|
self.trace_service = TraceabilityService(
|
|
self.artifact_repo, self.dep_repo, db_path=self.db_path
|
|
)
|
|
self.query_service = PromptQueryService(
|
|
self.artifact_repo, self.dep_repo, db_path=self.db_path
|
|
)
|
|
|
|
# Template analysis and compilation
|
|
self.analyzer = TemplateAnalyzer()
|
|
self.compiler = ContextCompiler()
|
|
|
|
# Information spaces
|
|
self.spaces = {
|
|
"templates": "infospace-templates",
|
|
"sources": "infospace-sources",
|
|
"guidelines": "infospace-guidelines",
|
|
"vsm-reference": "infospace-vsm-reference",
|
|
"entities": "infospace-entities",
|
|
"mappings": "infospace-mappings",
|
|
"analyses": "infospace-analyses",
|
|
"metrics": "infospace-metrics",
|
|
}
|
|
|
|
# ── Artifact Management ──────────────────────────────────────────
|
|
|
|
def load_or_create_artifact(
|
|
self,
|
|
space: str,
|
|
filepath: Path,
|
|
artifact_type: ArtifactType,
|
|
name: Optional[str] = None,
|
|
) -> Artifact:
|
|
"""Load artifact from file, create in repo if needed."""
|
|
if name is None:
|
|
name = filepath.stem
|
|
|
|
content = filepath.read_text()
|
|
|
|
existing = self.artifact_repo.get_by_name(space, name)
|
|
if existing:
|
|
return existing
|
|
|
|
artifact = Artifact.create(
|
|
space_id=space, name=name, content=content, artifact_type=artifact_type
|
|
)
|
|
artifact = self.artifact_repo.create(artifact)
|
|
print(f" + {name} ({artifact.content_digest[:8]})")
|
|
return artifact
|
|
|
|
def store_output_artifact(
|
|
self, space: str, name: str, content: str, artifact_type: ArtifactType
|
|
) -> Artifact:
|
|
"""Store a generated output artifact, updating if it already exists."""
|
|
existing = self.artifact_repo.get_by_name(space, name)
|
|
if existing:
|
|
self.artifact_repo.delete(existing.id)
|
|
|
|
artifact = Artifact.create(
|
|
space_id=space, name=name, content=content, artifact_type=artifact_type
|
|
)
|
|
artifact = self.artifact_repo.create(artifact)
|
|
return artifact
|
|
|
|
def bind_macro_artifact(self, space: str, macro_name: str, content: str) -> Artifact:
|
|
"""Bind content to a macro name in a space (for template resolution)."""
|
|
existing = self.artifact_repo.get_by_name(space, macro_name)
|
|
if existing:
|
|
self.artifact_repo.delete(existing.id)
|
|
|
|
artifact = Artifact.create(
|
|
space_id=space,
|
|
name=macro_name,
|
|
content=content,
|
|
artifact_type=ArtifactType.CONTENT,
|
|
)
|
|
artifact = self.artifact_repo.create(artifact)
|
|
return artifact
|
|
|
|
# ── Setup ────────────────────────────────────────────────────────
|
|
|
|
def setup(self):
|
|
"""Load all static artifacts (templates, guidelines, VSM reference)."""
|
|
print("Loading artifacts...")
|
|
|
|
# Templates
|
|
for tmpl_file in (self.example_dir / "templates").glob("*.md"):
|
|
self.load_or_create_artifact(
|
|
self.spaces["templates"], tmpl_file, ArtifactType.TEMPLATE
|
|
)
|
|
|
|
# VSM reference
|
|
for ref_file in (self.example_dir / "artifacts" / "vsm-reference").glob("*.md"):
|
|
self.load_or_create_artifact(
|
|
self.spaces["vsm-reference"], ref_file, ArtifactType.CONTENT,
|
|
name="vsm_framework",
|
|
)
|
|
|
|
# Guidelines
|
|
guideline_name_map = {
|
|
"extraction-rules.md": "extraction_rules",
|
|
"mapping-rules.md": "mapping_rules",
|
|
}
|
|
for guide_file in (self.example_dir / "artifacts" / "guidelines").glob("*.md"):
|
|
name = guideline_name_map.get(guide_file.name, guide_file.stem)
|
|
self.load_or_create_artifact(
|
|
self.spaces["guidelines"], guide_file, ArtifactType.CONTENT, name=name
|
|
)
|
|
|
|
print(" Done.\n")
|
|
|
|
# ── Template Resolution ──────────────────────────────────────────
|
|
|
|
def resolve_and_compile(
|
|
self, template_name: str, extra_spaces: list[str]
|
|
) -> Optional[str]:
|
|
"""Resolve macros and compile a template into a final prompt string.
|
|
|
|
Uses TemplateAnalyzer to parse @{target} macros from the template,
|
|
the resolver to look up artifact content, and ContextCompiler to
|
|
assemble the final prompt.
|
|
"""
|
|
template_artifact = self.artifact_repo.get_by_name(
|
|
self.spaces["templates"], template_name
|
|
)
|
|
if not template_artifact:
|
|
print(f" ERROR: Template '{template_name}' not found")
|
|
return None
|
|
|
|
template = PromptTemplate.from_artifact(template_artifact)
|
|
template_content = template_artifact.content
|
|
|
|
# Analyze template to extract @{target} macros
|
|
self.analyzer.analyze(template, template_content)
|
|
|
|
config = ResolutionConfig(
|
|
space_id=self.spaces["templates"],
|
|
included_spaces=[self.spaces[s] for s in extra_spaces],
|
|
)
|
|
|
|
strategy = MultiSpaceResolutionStrategy()
|
|
resolver = PromptResolver(self.artifact_service, strategy)
|
|
result = resolver.resolve_template(template, config)
|
|
|
|
if not result.success:
|
|
print(f" ERROR: Resolution failed: {result.context.errors}")
|
|
return None
|
|
|
|
# Compile template with resolved content
|
|
compiled = self.compiler.compile(template, template_content, result)
|
|
return compiled.content
|
|
|
|
# ── LLM Execution Helpers ─────────────────────────────────────────
|
|
|
|
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
|
|
"""Call the LLM and return the content string, or ``None`` on failure.
|
|
|
|
Retries up to 3 times on rate-limit (429) errors with exponential backoff.
|
|
Does **not** write any files — callers decide where to persist.
|
|
"""
|
|
import time as _time
|
|
from markitect.prompts.execution.models import RunConfig
|
|
from markitect.llm.exceptions import LLMRateLimitError
|
|
|
|
print(f" Calling LLM ({stage_label})...")
|
|
t0 = _time.time()
|
|
max_retries = 3
|
|
for attempt in range(max_retries + 1):
|
|
try:
|
|
response = self.llm_adapter.execute_prompt(prompt, RunConfig(max_tokens=max_tokens))
|
|
break # success
|
|
except LLMRateLimitError as exc:
|
|
if attempt < max_retries:
|
|
wait = 15 * (attempt + 1) # 15, 30, 45 seconds
|
|
print(f" Rate limited, retrying in {wait}s (attempt {attempt + 1}/{max_retries})...")
|
|
_time.sleep(wait)
|
|
else:
|
|
print(f" LLM rate limit after {max_retries} retries ({_time.time() - t0:.1f}s): {exc}")
|
|
return None
|
|
except Exception as exc:
|
|
print(f" LLM error ({_time.time() - t0:.1f}s): {exc}")
|
|
return None
|
|
|
|
elapsed = _time.time() - t0
|
|
usage = response.usage
|
|
print(
|
|
f" LLM done in {elapsed:.1f}s — "
|
|
f"prompt {usage.get('prompt_tokens', '?')} tok, "
|
|
f"completion {usage.get('completion_tokens', '?')} tok, "
|
|
f"total {usage.get('total_tokens', '?')} tok"
|
|
)
|
|
|
|
content = response.content
|
|
if not content or not content.strip():
|
|
print(f" LLM returned empty content")
|
|
return None
|
|
|
|
return content
|
|
|
|
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
|
|
"""Call the LLM, write the result to *output_file*, and return it."""
|
|
content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
|
|
if content:
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
output_file.write_text(content)
|
|
print(f" LLM output written to {output_file.name}")
|
|
return content
|
|
|
|
# ── Entity Management (flat canonical set) ─────────────────────
|
|
|
|
@staticmethod
|
|
def _normalize_entity_name(name: str) -> str:
|
|
"""Normalize an entity name to a kebab-case filename stem."""
|
|
slug = name.lower().strip()
|
|
slug = slug.replace("_", "-").replace(" ", "-")
|
|
slug = re.sub(r"[^a-z0-9-]", "", slug)
|
|
slug = re.sub(r"-{2,}", "-", slug)
|
|
return slug.strip("-")
|
|
|
|
def _entities_dir(self) -> Path:
|
|
return self.example_dir / "output" / "entities"
|
|
|
|
def _archive_dir(self) -> Path:
|
|
return self._entities_dir() / "archive"
|
|
|
|
def _list_existing_entity_names(self) -> list[str]:
|
|
"""Return sorted slugs of all canonical entity files already on disk."""
|
|
return sorted(
|
|
f.stem
|
|
for f in self._entities_dir().glob("*.md")
|
|
if not f.name.endswith("-entities.md")
|
|
and not f.name.endswith("-prompt.md")
|
|
)
|
|
|
|
def archive_entity(self, slug: str, reason: str) -> None:
|
|
"""Move a canonical entity to the archive with a documented reason.
|
|
|
|
The entity file is prepended with an archive header explaining why
|
|
it was retired, then moved to ``output/entities/archive/<slug>.md``.
|
|
Chapter views that reference this entity are **not** updated
|
|
automatically — review and update them manually.
|
|
"""
|
|
src = self._entities_dir() / f"{slug}.md"
|
|
if not src.exists():
|
|
print(f" Entity not found: {slug}")
|
|
return
|
|
|
|
archive = self._archive_dir()
|
|
archive.mkdir(parents=True, exist_ok=True)
|
|
dest = archive / f"{slug}.md"
|
|
|
|
from datetime import date
|
|
header = (
|
|
f"<!-- ARCHIVED {date.today().isoformat()}\n"
|
|
f" Reason: {reason}\n"
|
|
f"-->\n\n"
|
|
)
|
|
content = src.read_text()
|
|
dest.write_text(header + content)
|
|
src.unlink()
|
|
|
|
# Report which chapter views still reference this entity
|
|
refs = []
|
|
for view in self._entities_dir().glob("*-entities.md"):
|
|
if f'include "{slug}.md"' in view.read_text():
|
|
refs.append(view.name)
|
|
|
|
print(f" Archived: {slug}.md -> archive/{slug}.md")
|
|
print(f" Reason: {reason}")
|
|
if refs:
|
|
print(f" Referenced by: {', '.join(refs)} (update these views)")
|
|
print(f" Canonical set: {len(self._list_existing_entity_names())} entities")
|
|
|
|
def _split_entities(
|
|
self, combined_content: str
|
|
) -> list[tuple[str, Path]]:
|
|
"""Split combined LLM output into the flat canonical entity directory.
|
|
|
|
Writes each entity to ``output/entities/<slug>.md``. If a file
|
|
with that slug already exists it is **skipped** (first-occurrence
|
|
wins), but the entity is still included in the returned list so
|
|
the chapter view can reference it.
|
|
|
|
Returns list of (entity_name, file_path) for every entity in
|
|
*combined_content* (new and pre-existing alike).
|
|
"""
|
|
entities_dir = self._entities_dir()
|
|
entities_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
parts = re.split(
|
|
r"^---\s*ENTITY:\s*(.+?)\s*---\s*$",
|
|
combined_content,
|
|
flags=re.MULTILINE,
|
|
)
|
|
|
|
entity_files: list[tuple[str, Path]] = []
|
|
new_count = 0
|
|
skipped_count = 0
|
|
|
|
for i in range(1, len(parts), 2):
|
|
entity_name = parts[i]
|
|
entity_content = parts[i + 1].strip() if i + 1 < len(parts) else ""
|
|
|
|
slug = self._normalize_entity_name(entity_name)
|
|
if not slug:
|
|
continue
|
|
|
|
file_path = entities_dir / f"{slug}.md"
|
|
if file_path.exists():
|
|
skipped_count += 1
|
|
else:
|
|
file_path.write_text(entity_content + "\n")
|
|
new_count += 1
|
|
|
|
entity_files.append((entity_name, file_path))
|
|
|
|
msg = f" {new_count} new entities written"
|
|
if skipped_count:
|
|
msg += f", {skipped_count} pre-existing (skipped)"
|
|
print(msg)
|
|
return entity_files
|
|
|
|
def _write_chapter_entity_view(
|
|
self, chapter_id: str, entity_files: list[tuple[str, Path]]
|
|
) -> Path:
|
|
"""Write a per-chapter view file that transcludes individual entities."""
|
|
parts = chapter_id.split("-")
|
|
book_num = int(parts[1]) if len(parts) >= 2 else 1
|
|
ch_num = int(parts[3]) if len(parts) >= 4 else 0
|
|
roman = {1: "I", 2: "II", 3: "III", 4: "IV", 5: "V"}.get(book_num, str(book_num))
|
|
title = f"# Economic Entities — Book {roman}, Chapter {ch_num}\n"
|
|
|
|
lines = [title]
|
|
for _name, file_path in entity_files:
|
|
lines.append(f'{{{{ include "{file_path.name}" }}}}')
|
|
lines.append("")
|
|
lines.append("---")
|
|
lines.append("")
|
|
|
|
# Remove trailing separator after last entity
|
|
if lines and lines[-1] == "" and len(lines) >= 3 and lines[-2] == "---":
|
|
lines = lines[:-2]
|
|
|
|
view_path = self._entities_dir() / f"{chapter_id}-entities.md"
|
|
view_path.write_text("\n".join(lines) + "\n")
|
|
print(f" Chapter view written to {view_path.name}")
|
|
return view_path
|
|
|
|
def _read_entities_from_view(
|
|
self, chapter_id: str
|
|
) -> tuple[str, list[tuple[str, Path]]]:
|
|
"""Reconstruct combined entity content from a chapter view file.
|
|
|
|
Parses ``{{ include "..." }}`` directives in the view to discover
|
|
which canonical entity files belong to this chapter, reads them,
|
|
and rebuilds the delimited combined content needed by downstream
|
|
stages.
|
|
"""
|
|
from markitect.packaging.transclusion.directives import DirectiveParser
|
|
|
|
view_path = self._entities_dir() / f"{chapter_id}-entities.md"
|
|
view_content = view_path.read_text()
|
|
includes = DirectiveParser.extract_file_includes(view_content)
|
|
|
|
entities_dir = self._entities_dir()
|
|
entity_files: list[tuple[str, Path]] = []
|
|
parts: list[str] = []
|
|
|
|
for rel_path in includes:
|
|
file_path = entities_dir / rel_path
|
|
if not file_path.exists():
|
|
continue
|
|
slug = file_path.stem
|
|
body = file_path.read_text().strip()
|
|
parts.append(f"--- ENTITY: {slug} ---\n\n{body}")
|
|
entity_files.append((slug, file_path))
|
|
|
|
combined = "\n\n".join(parts) + "\n" if parts else ""
|
|
return combined, entity_files
|
|
|
|
# ── Pipeline Stages ──────────────────────────────────────────────
|
|
|
|
def stage_extract_entities(self, chapter_id: str, chapter_content: str) -> Optional[str]:
|
|
"""Stage 1: Extract economic entities from a chapter.
|
|
|
|
Canonical entity files live in a **flat** directory
|
|
(``output/entities/<slug>.md``). Duplicates across chapters are
|
|
skipped — first occurrence wins. The per-chapter view file
|
|
(``<chapter_id>-entities.md``) is a **secondary** transclusion view
|
|
that ``{{ include }}``s each entity relevant to the chapter.
|
|
"""
|
|
print(f" [1/3] Extracting entities...")
|
|
|
|
# Bind the chapter content to the macro name
|
|
self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content)
|
|
|
|
# Bind existing entity list so the LLM knows what already exists
|
|
existing = self._list_existing_entity_names()
|
|
if existing:
|
|
entity_list = "\n".join(f"- {name}" for name in existing)
|
|
else:
|
|
entity_list = "(none — this is the first chapter)"
|
|
self.bind_macro_artifact(
|
|
self.spaces["entities"], "existing_entities", entity_list
|
|
)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"extract-entities",
|
|
["sources", "guidelines", "vsm-reference", "entities"],
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
view_file = self._entities_dir() / f"{chapter_id}-entities.md"
|
|
|
|
# Write compiled prompt only when no output exists yet (avoids dirty
|
|
# working tree on DB-only rebuilds — Task 5 fix)
|
|
prompt_file = self._entities_dir() / f"{chapter_id}-prompt.md"
|
|
if not (view_file.exists() and "{{ include" in view_file.read_text()):
|
|
prompt_file.parent.mkdir(parents=True, exist_ok=True)
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
# ── PRIMARY: chapter view with transclusion already on disk ──
|
|
if view_file.exists() and "{{ include" in view_file.read_text():
|
|
content, entity_files = self._read_entities_from_view(chapter_id)
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found chapter view referencing {len(entity_files)} entities")
|
|
return content
|
|
|
|
# ── MIGRATION: per-chapter subdirectory (previous format) ──
|
|
subdir = self._entities_dir() / chapter_id
|
|
if subdir.is_dir() and list(subdir.glob("*.md")):
|
|
print(f" Migrating per-chapter subdir: {chapter_id}/")
|
|
entity_files: list[tuple[str, Path]] = []
|
|
entities_dir = self._entities_dir()
|
|
for src in sorted(subdir.glob("*.md")):
|
|
dest = entities_dir / src.name
|
|
if not dest.exists():
|
|
src.rename(dest)
|
|
entity_files.append((src.stem, dest))
|
|
# Clean up empty subdir
|
|
if not list(subdir.glob("*")):
|
|
subdir.rmdir()
|
|
self._write_chapter_entity_view(chapter_id, entity_files)
|
|
content = self._read_entities_from_view(chapter_id)[0]
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
# ── MIGRATION: legacy combined file (pre-split format) ──
|
|
if view_file.exists():
|
|
raw = view_file.read_text()
|
|
if "--- ENTITY:" in raw:
|
|
print(f" Migrating legacy combined file: {view_file.name}")
|
|
entity_files = self._split_entities(raw)
|
|
self._write_chapter_entity_view(chapter_id, entity_files)
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
raw,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return raw
|
|
|
|
# ── GENERATE: call LLM, persist individual files first ──
|
|
if self.llm_adapter and prompt:
|
|
combined = self._call_llm(prompt, "entities")
|
|
if combined:
|
|
entity_files = self._split_entities(combined)
|
|
self._write_chapter_entity_view(chapter_id, entity_files)
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
combined,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return combined
|
|
|
|
print(f" Awaiting entity files in: output/entities/")
|
|
return None
|
|
|
|
def stage_map_to_vsm(self, chapter_id: str, entities_content: str) -> Optional[str]:
|
|
"""Stage 2: Map extracted entities to VSM concepts."""
|
|
print(f" [2/3] Mapping to VSM...")
|
|
|
|
self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"map-to-vsm", ["entities", "vsm-reference", "guidelines"]
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
output_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-mappings.md"
|
|
# Write compiled prompt only when output does not yet exist (Task 5 fix)
|
|
if not output_file.exists():
|
|
prompt_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-prompt.md"
|
|
prompt_file.parent.mkdir(parents=True, exist_ok=True)
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
if output_file.exists():
|
|
content = output_file.read_text()
|
|
self.store_output_artifact(
|
|
self.spaces["mappings"],
|
|
f"{chapter_id}-mappings",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found existing output: {output_file.name}")
|
|
return content
|
|
|
|
if self.llm_adapter and prompt:
|
|
content = self._execute_llm(prompt, output_file, "mappings")
|
|
if content:
|
|
self.store_output_artifact(
|
|
self.spaces["mappings"],
|
|
f"{chapter_id}-mappings",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}")
|
|
return None
|
|
|
|
def stage_synthesize_analysis(
|
|
self, chapter_id: str, chapter_content: str, entities_content: str, mappings_content: str
|
|
) -> Optional[str]:
|
|
"""Stage 3: Synthesize chapter-level VSM analysis."""
|
|
print(f" [3/3] Synthesizing analysis...")
|
|
|
|
self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content)
|
|
self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content)
|
|
self.bind_macro_artifact(self.spaces["mappings"], "mappings", mappings_content)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"synthesize-analysis",
|
|
["sources", "entities", "mappings", "vsm-reference"],
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
output_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-analysis.md"
|
|
# Write compiled prompt only when output does not yet exist (Task 5 fix)
|
|
if not output_file.exists():
|
|
prompt_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-prompt.md"
|
|
prompt_file.parent.mkdir(parents=True, exist_ok=True)
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
if output_file.exists():
|
|
content = output_file.read_text()
|
|
self.store_output_artifact(
|
|
self.spaces["analyses"],
|
|
f"{chapter_id}-analysis",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found existing output: {output_file.name}")
|
|
return content
|
|
|
|
if self.llm_adapter and prompt:
|
|
content = self._execute_llm(prompt, output_file, "analysis")
|
|
if content:
|
|
self.store_output_artifact(
|
|
self.spaces["analyses"],
|
|
f"{chapter_id}-analysis",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}")
|
|
return None
|
|
|
|
# ── Metrics ──────────────────────────────────────────────────────
|
|
|
|
def assess_metrics(self) -> Optional[str]:
|
|
"""Run the assess-metrics template across all completed analyses."""
|
|
print("Assessing metrics...")
|
|
|
|
analyses_dir = self.example_dir / "output" / "analyses"
|
|
analysis_files = sorted(analyses_dir.glob("*-analysis.md"))
|
|
|
|
if not analysis_files:
|
|
print(" No completed analyses found. Process chapters first.")
|
|
return None
|
|
|
|
# Concatenate all analyses
|
|
all_analyses = []
|
|
for f in analysis_files:
|
|
all_analyses.append(f"<!-- Source: {f.name} -->\n{f.read_text()}")
|
|
combined = "\n\n---\n\n".join(all_analyses)
|
|
|
|
self.bind_macro_artifact(self.spaces["analyses"], "all_analyses", combined)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"assess-metrics", ["analyses", "vsm-reference"]
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
output_file = self.example_dir / "output" / "metrics" / "metrics-report.md"
|
|
# Write compiled prompt only when output does not yet exist (Task 5 fix)
|
|
if not output_file.exists():
|
|
prompt_file = self.example_dir / "output" / "metrics" / "metrics-prompt.md"
|
|
prompt_file.parent.mkdir(parents=True, exist_ok=True)
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
if output_file.exists():
|
|
content = output_file.read_text()
|
|
self.store_output_artifact(
|
|
self.spaces["metrics"],
|
|
"metrics-report",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found existing output: {output_file.name}")
|
|
return content
|
|
|
|
if self.llm_adapter and prompt:
|
|
content = self._execute_llm(prompt, output_file, "metrics")
|
|
if content:
|
|
self.store_output_artifact(
|
|
self.spaces["metrics"],
|
|
"metrics-report",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}")
|
|
return None
|
|
|
|
# ── Entity Evaluation (Task 9) ────────────────────────────────────
|
|
|
|
def _extract_quality_rubric(self) -> str:
|
|
"""Extract the Quality Metrics section from the entity schema file."""
|
|
schema_file = self.example_dir / "schemas" / "economic-entity-schema-v1.0.md"
|
|
text = schema_file.read_text()
|
|
# Find the ## Quality Metrics section up to the next ## section
|
|
import re as _re
|
|
m = _re.search(
|
|
r"^## Quality Metrics\n(.*?)^## ",
|
|
text,
|
|
flags=_re.MULTILINE | _re.DOTALL,
|
|
)
|
|
if m:
|
|
return ("## Quality Metrics\n" + m.group(1)).strip()
|
|
return text # fallback: whole schema
|
|
|
|
def _extract_source_chapter_from_entity(self, entity_text: str) -> str:
|
|
"""Extract the Source Chapter field from an entity markdown file."""
|
|
import re as _re
|
|
m = _re.search(
|
|
r"^## Source Chapter\s*\n+(.+?)(?:\n\n|\n##|\Z)",
|
|
entity_text,
|
|
flags=_re.MULTILINE | _re.DOTALL,
|
|
)
|
|
if m:
|
|
return m.group(1).strip()
|
|
return "Unknown chapter"
|
|
|
|
def evaluate_entities(self, chapter_id: Optional[str] = None) -> None:
|
|
"""Evaluate canonical entities using the evaluate-entity template.
|
|
|
|
If *chapter_id* is given, evaluates only entities introduced by that
|
|
chapter (determined from the chapter view file). Otherwise evaluates
|
|
all canonical entities.
|
|
|
|
Outputs are written to ``output/evaluations/<slug>-eval.md``.
|
|
Existing evaluation files are skipped (idempotent).
|
|
"""
|
|
evaluations_dir = self.example_dir / "output" / "evaluations"
|
|
evaluations_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Determine which entity files to evaluate
|
|
if chapter_id:
|
|
view_file = self._entities_dir() / f"{chapter_id}-entities.md"
|
|
if not view_file.exists():
|
|
print(f" No chapter view found for {chapter_id}")
|
|
return
|
|
_, entity_files = self._read_entities_from_view(chapter_id)
|
|
if not entity_files:
|
|
print(f" No entities found for chapter {chapter_id}")
|
|
return
|
|
print(f"Evaluating {len(entity_files)} entities from {chapter_id}...")
|
|
else:
|
|
slugs = self._list_existing_entity_names()
|
|
entity_files = [(s, self._entities_dir() / f"{s}.md") for s in slugs]
|
|
print(f"Evaluating {len(entity_files)} canonical entities...")
|
|
|
|
if not entity_files:
|
|
print(" No entities to evaluate.")
|
|
return
|
|
|
|
# Shared context loaded once
|
|
quality_rubric = self._extract_quality_rubric()
|
|
self.bind_macro_artifact(self.spaces["guidelines"], "quality_rubric", quality_rubric)
|
|
|
|
done = 0
|
|
skipped = 0
|
|
failed = 0
|
|
|
|
for slug, entity_path in entity_files:
|
|
output_file = evaluations_dir / f"{slug}-eval.md"
|
|
if output_file.exists():
|
|
skipped += 1
|
|
continue
|
|
|
|
if not entity_path.exists():
|
|
print(f" MISSING: {entity_path.name}")
|
|
failed += 1
|
|
continue
|
|
|
|
entity_text = entity_path.read_text()
|
|
source_chapter = self._extract_source_chapter_from_entity(entity_text)
|
|
|
|
# Bind per-entity macros
|
|
self.bind_macro_artifact(self.spaces["entities"], "entity_content", entity_text)
|
|
self.bind_macro_artifact(self.spaces["sources"], "source_chapter", source_chapter)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"evaluate-entity",
|
|
["entities", "sources", "vsm-reference", "guidelines"],
|
|
)
|
|
if not prompt:
|
|
print(f" FAILED to compile prompt for {slug}")
|
|
failed += 1
|
|
continue
|
|
|
|
# Write prompt only when output does not yet exist (Task 5 fix)
|
|
prompt_file = evaluations_dir / f"{slug}-eval-prompt.md"
|
|
if not output_file.exists():
|
|
prompt_file.write_text(prompt)
|
|
|
|
if not self.llm_adapter:
|
|
print(f" {slug}: prompt written, awaiting manual evaluation")
|
|
done += 1
|
|
continue
|
|
|
|
print(f" Evaluating: {slug}...")
|
|
content = self._execute_llm(prompt, output_file, f"eval:{slug}", max_tokens=1024)
|
|
if content:
|
|
done += 1
|
|
else:
|
|
failed += 1
|
|
|
|
total = done + skipped + failed
|
|
print(f"\nEvaluation complete: {done} done, {skipped} skipped (existing), {failed} failed — {total} total")
|
|
|
|
# ── Chapter Processing ───────────────────────────────────────────
|
|
|
|
def process_chapter(self, chapter_id: str, auto_commit: bool = True):
|
|
"""Run the full pipeline for a single chapter."""
|
|
source_file = self.example_dir / "artifacts" / "sources" / f"{chapter_id}.md"
|
|
if not source_file.exists():
|
|
print(f"ERROR: Source file not found: {source_file}")
|
|
return
|
|
|
|
print(f"Processing: {chapter_id}")
|
|
print(f"{'=' * 60}")
|
|
|
|
chapter_content = source_file.read_text()
|
|
|
|
# Store source artifact
|
|
self.load_or_create_artifact(
|
|
self.spaces["sources"], source_file, ArtifactType.CONTENT
|
|
)
|
|
|
|
# Stage 1: Extract entities
|
|
entities = self.stage_extract_entities(chapter_id, chapter_content)
|
|
if entities is None:
|
|
print(f"\n Pipeline paused. Generate entities output and re-run.")
|
|
return
|
|
|
|
# Stage 2: Map to VSM
|
|
mappings = self.stage_map_to_vsm(chapter_id, entities)
|
|
if mappings is None:
|
|
print(f"\n Pipeline paused. Generate mappings output and re-run.")
|
|
return
|
|
|
|
# Stage 3: Synthesize analysis
|
|
analysis = self.stage_synthesize_analysis(
|
|
chapter_id, chapter_content, entities, mappings
|
|
)
|
|
if analysis is None:
|
|
print(f"\n Pipeline paused. Generate analysis output and re-run.")
|
|
return
|
|
|
|
print(f"\n Chapter {chapter_id} fully processed.")
|
|
|
|
# Record dependency edges
|
|
self._record_chapter_dependencies(chapter_id)
|
|
|
|
# Git commit
|
|
if auto_commit:
|
|
self._git_commit_chapter(chapter_id)
|
|
|
|
def _record_chapter_dependencies(self, chapter_id: str):
|
|
"""Record dependency edges for a processed chapter."""
|
|
run_id = f"run-{chapter_id}"
|
|
manifest = RunManifest.create(
|
|
run_id=run_id,
|
|
template_id="extract-entities",
|
|
template_name="extract-entities",
|
|
template_digest="",
|
|
)
|
|
|
|
# Source → Run
|
|
source = self.artifact_repo.get_by_name(self.spaces["sources"], chapter_id)
|
|
if source:
|
|
manifest.add_dependency_edge(source.id, run_id, "requires")
|
|
|
|
# Run → Outputs
|
|
for output_type in ["entities", "mappings", "analyses"]:
|
|
space = self.spaces[output_type]
|
|
suffix = {"entities": "entities", "mappings": "mappings", "analyses": "analysis"}
|
|
name = f"{chapter_id}-{suffix[output_type]}"
|
|
artifact = self.artifact_repo.get_by_name(space, name)
|
|
if artifact:
|
|
manifest.add_dependency_edge(run_id, artifact.id, "generates")
|
|
|
|
try:
|
|
edges = self.graph_builder.persist_edges(manifest)
|
|
print(f" Recorded {len(edges)} dependency edges.")
|
|
except Exception as e:
|
|
print(f" Warning: Could not record dependencies: {e}")
|
|
|
|
def _git_commit_chapter(self, chapter_id: str):
|
|
"""Commit chapter outputs to git."""
|
|
output_dir = self.example_dir / "output"
|
|
try:
|
|
subprocess.run(
|
|
["git", "add", str(output_dir)],
|
|
cwd=str(self.example_dir),
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
subprocess.run(
|
|
["git", "commit", "-m", f"infospace: process {chapter_id}\n\n"
|
|
f"Extract entities, map to VSM, and synthesize analysis\n"
|
|
f"for {chapter_id}."],
|
|
cwd=str(project_root),
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
print(f" Git commit: infospace: process {chapter_id}")
|
|
except subprocess.CalledProcessError as e:
|
|
print(f" Warning: Git commit skipped ({e})")
|
|
|
|
# ── Listing ──────────────────────────────────────────────────────
|
|
|
|
def list_chapters(self):
|
|
"""List all available chapters and their processing status."""
|
|
sources_dir = self.example_dir / "artifacts" / "sources"
|
|
chapters = sorted(f.stem for f in sources_dir.glob("*.md"))
|
|
|
|
print(f"Available chapters ({len(chapters)}):\n")
|
|
print(f" {'Chapter':<30} {'Entities':<12} {'Mappings':<12} {'Analysis':<12}")
|
|
print(f" {'-'*30} {'-'*12} {'-'*12} {'-'*12}")
|
|
|
|
for ch in chapters:
|
|
view_file = self._entities_dir() / f"{ch}-entities.md"
|
|
entity_count = 0
|
|
if view_file.exists() and "{{ include" in view_file.read_text():
|
|
from markitect.packaging.transclusion.directives import DirectiveParser
|
|
entity_count = len(DirectiveParser.extract_file_includes(view_file.read_text()))
|
|
entities = f"done ({entity_count})" if entity_count else "-"
|
|
mappings = "done" if (self.example_dir / "output" / "mappings" / f"{ch}-mappings.md").exists() else "-"
|
|
analysis = "done" if (self.example_dir / "output" / "analyses" / f"{ch}-analysis.md").exists() else "-"
|
|
print(f" {ch:<30} {entities:<12} {mappings:<12} {analysis:<12}")
|
|
|
|
total_entities = len(self._list_existing_entity_names())
|
|
if total_entities:
|
|
print(f"\n Canonical entity set: {total_entities} unique entities")
|
|
archive = self._archive_dir()
|
|
if archive.exists():
|
|
archived = len(list(archive.glob("*.md")))
|
|
if archived:
|
|
print(f" Archived entities: {archived}")
|
|
|
|
# ── Statistics ───────────────────────────────────────────────────
|
|
|
|
def show_stats(self):
|
|
"""Show dependency graph statistics."""
|
|
print("\nDependency Statistics:")
|
|
try:
|
|
stats = self.query_service.get_dependency_stats()
|
|
print(f" Nodes: {stats['total_nodes']}")
|
|
print(f" Edges: {stats['total_edges']}")
|
|
print(f" Root artifacts: {stats['root_count']}")
|
|
print(f" Leaf artifacts: {stats['leaf_count']}")
|
|
print(f" Has cycles: {stats['has_cycles']}")
|
|
except Exception as e:
|
|
print(f" (No data yet: {e})")
|
|
|
|
|
|
# ── Infospace tooling integration ─────────────────────────────────
|
|
|
|
|
|
def _load_infospace(example_dir: Path):
|
|
"""Load infospace config and entities from the example directory."""
|
|
from markitect.infospace.config import load_infospace_config
|
|
from markitect.infospace.entity_parser import parse_entity_directory
|
|
|
|
config_path = example_dir / "infospace.yaml"
|
|
if not config_path.is_file():
|
|
print("Error: No infospace.yaml found. Create one first.")
|
|
sys.exit(1)
|
|
|
|
config = load_infospace_config(config_path)
|
|
entities_dir = example_dir / config.entities_dir
|
|
entities = parse_entity_directory(entities_dir) if entities_dir.is_dir() else []
|
|
return config, config_path, entities
|
|
|
|
|
|
def _run_infospace_status(example_dir: Path):
|
|
"""Show infospace status using the tooling layer."""
|
|
from markitect.infospace.state import build_state
|
|
|
|
config, config_path, entities = _load_infospace(example_dir)
|
|
state = build_state(config, entities=entities)
|
|
|
|
print(f"Infospace: {state.topic_name}")
|
|
print(f"Domain: {config.topic.domain}")
|
|
print(f"Entities: {state.entity_count}")
|
|
if state.domains:
|
|
print(f"Domains: {', '.join(state.domains)}")
|
|
if config.disciplines:
|
|
names = [d.name for d in config.disciplines]
|
|
print(f"Disciplines: {', '.join(names)}")
|
|
|
|
# Show processing progress
|
|
sources_dir = example_dir / "artifacts" / "sources"
|
|
total_chapters = len(list(sources_dir.glob("*.md")))
|
|
processed = len(list((example_dir / "output" / "analyses").glob("*-analysis.md")))
|
|
print(f"Chapters: {processed}/{total_chapters} processed")
|
|
|
|
|
|
def _run_infospace_check(example_dir: Path):
|
|
"""Run collection-level quality checks."""
|
|
from markitect.infospace.checks import run_all_checks
|
|
from markitect.infospace.history import record_check_results
|
|
|
|
config, config_path, entities = _load_infospace(example_dir)
|
|
|
|
if not entities:
|
|
print("No entities to check.")
|
|
return
|
|
|
|
print(f"Running collection checks on {len(entities)} entities...\n")
|
|
report = run_all_checks(entities=entities)
|
|
|
|
d = report.to_dict()
|
|
for concern_name, concern_data in d.items():
|
|
label = concern_data.get("concern", concern_name.upper())
|
|
print(f" {label} — {concern_name}")
|
|
for k, v in concern_data.items():
|
|
if k == "concern":
|
|
continue
|
|
print(f" {k}: {v}")
|
|
print()
|
|
|
|
m = report.metrics()
|
|
if m:
|
|
print("Metrics summary:")
|
|
for k, v in sorted(m.items()):
|
|
print(f" {k}: {v:.4f}")
|
|
snap = record_check_results(report, config, example_dir, entity_count=len(entities))
|
|
print(f"\nRecorded snapshot {snap.snapshot_id}")
|
|
|
|
|
|
def _run_infospace_viability(example_dir: Path):
|
|
"""Show viability dashboard."""
|
|
from markitect.infospace.history import read_metrics_file
|
|
from markitect.infospace.state import build_state
|
|
|
|
config, config_path, entities = _load_infospace(example_dir)
|
|
|
|
if not config.viability:
|
|
print("No viability thresholds configured.")
|
|
return
|
|
|
|
metrics = read_metrics_file(example_dir / config.metrics_dir / "metrics.yaml")
|
|
if not metrics:
|
|
print("No metrics available. Run --infospace-check first.")
|
|
print("\nConfigured thresholds:")
|
|
for name, t in config.viability.items():
|
|
bounds = []
|
|
if t.min is not None:
|
|
bounds.append(f"min={t.min}")
|
|
if t.max is not None:
|
|
bounds.append(f"max={t.max}")
|
|
print(f" {name}: {', '.join(bounds)}")
|
|
return
|
|
|
|
state = build_state(config, entities=entities, metrics=metrics)
|
|
|
|
print(f"{'Metric':<30} {'Value':>8} {'Threshold':>15} {'Status':>8}")
|
|
print("-" * 63)
|
|
for r in state.viability_results:
|
|
bounds = []
|
|
if r.threshold.min is not None:
|
|
bounds.append(f"min={r.threshold.min}")
|
|
if r.threshold.max is not None:
|
|
bounds.append(f"max={r.threshold.max}")
|
|
status_str = "PASS" if r.passed else "FAIL"
|
|
print(f"{r.metric:<30} {r.value:>8.4f} {', '.join(bounds):>15} {status_str:>8}")
|
|
|
|
print()
|
|
if state.is_viable:
|
|
print(f"Viable: YES ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|
|
else:
|
|
print(f"Viable: NO ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Process Wealth of Nations chapters through VSM analysis pipeline"
|
|
)
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
group.add_argument("--chapter", type=str, help="Process a single chapter (e.g., book-1-chapter-01)")
|
|
group.add_argument("--book", type=int, help="Process all chapters in a book (1-5)")
|
|
group.add_argument("--all", action="store_true", help="Process all chapters")
|
|
group.add_argument("--metrics", action="store_true", help="Assess metrics only")
|
|
group.add_argument("--list", action="store_true", help="List available chapters")
|
|
group.add_argument("--stats", action="store_true", help="Show dependency statistics")
|
|
group.add_argument("--archive-entity", type=str, metavar="SLUG",
|
|
help="Archive an entity (move to archive/ with reason)")
|
|
group.add_argument("--infospace-status", action="store_true",
|
|
help="Show infospace status via infospace tooling")
|
|
group.add_argument("--infospace-check", action="store_true",
|
|
help="Run collection-level quality checks (C1-C5)")
|
|
group.add_argument("--infospace-viability", action="store_true",
|
|
help="Show viability dashboard")
|
|
group.add_argument("--evaluate", action="store_true",
|
|
help="Evaluate entity quality using the evaluate-entity template")
|
|
|
|
parser.add_argument("--reason", type=str, default=None,
|
|
help="Reason for archiving (used with --archive-entity)")
|
|
parser.add_argument("--eval-chapter", type=str, default=None, metavar="CHAPTER_ID",
|
|
help="Limit --evaluate to entities from a specific chapter")
|
|
parser.add_argument("--no-commit", action="store_true", help="Skip git commits")
|
|
parser.add_argument(
|
|
"--provider",
|
|
type=str,
|
|
choices=["openrouter", "claude-code", "gemini", "openai"],
|
|
default=None,
|
|
help="LLM provider for auto-generating outputs (omit for manual mode)",
|
|
)
|
|
parser.add_argument("--model", type=str, default=None, help="Model name to pass to the LLM provider")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Build optional LLM adapter
|
|
_PROVIDER_DEFAULTS = {
|
|
"openrouter": "arcee-ai/trinity-large-preview:free",
|
|
}
|
|
llm_adapter = None
|
|
if args.provider:
|
|
from markitect.llm import create_adapter
|
|
model = args.model or _PROVIDER_DEFAULTS.get(args.provider)
|
|
llm_adapter = create_adapter(args.provider, model=model)
|
|
print(f"LLM: {args.provider} ({model or 'default'})")
|
|
|
|
example_dir = Path(__file__).parent
|
|
processor = ChapterProcessor(example_dir, llm_adapter=llm_adapter)
|
|
processor.setup()
|
|
|
|
if args.archive_entity:
|
|
if not args.reason:
|
|
parser.error("--archive-entity requires --reason")
|
|
processor.archive_entity(args.archive_entity, args.reason)
|
|
elif args.list:
|
|
processor.list_chapters()
|
|
elif args.stats:
|
|
processor.show_stats()
|
|
elif args.metrics:
|
|
processor.assess_metrics()
|
|
elif args.chapter:
|
|
processor.process_chapter(args.chapter, auto_commit=not args.no_commit)
|
|
elif args.book:
|
|
sources_dir = example_dir / "artifacts" / "sources"
|
|
chapters = sorted(
|
|
f.stem for f in sources_dir.glob(f"book-{args.book}-chapter-*.md")
|
|
)
|
|
if not chapters:
|
|
print(f"No chapters found for Book {args.book}")
|
|
return
|
|
print(f"Processing {len(chapters)} chapters from Book {args.book}\n")
|
|
for ch in chapters:
|
|
processor.process_chapter(ch, auto_commit=not args.no_commit)
|
|
print()
|
|
elif args.all:
|
|
sources_dir = example_dir / "artifacts" / "sources"
|
|
chapters = sorted(f.stem for f in sources_dir.glob("*.md"))
|
|
print(f"Processing all {len(chapters)} chapters\n")
|
|
for ch in chapters:
|
|
processor.process_chapter(ch, auto_commit=not args.no_commit)
|
|
print()
|
|
elif args.infospace_status:
|
|
_run_infospace_status(example_dir)
|
|
return
|
|
elif args.infospace_check:
|
|
_run_infospace_check(example_dir)
|
|
return
|
|
elif args.infospace_viability:
|
|
_run_infospace_viability(example_dir)
|
|
return
|
|
elif args.evaluate:
|
|
processor.evaluate_entities(chapter_id=args.eval_chapter)
|
|
return
|
|
|
|
processor.show_stats()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|