Add infospace.yaml declaring topic, disciplines, schemas, viability thresholds. Integrate infospace tooling into process_chapters.py with --infospace-status, --infospace-check, and --infospace-viability flags. Initial check: 85 entities, 4/5 viable (coverage 0.36 < 0.50 — only 7/35 chapters processed so far). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1073 lines
44 KiB
Python
1073 lines
44 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Infospace with History — Chapter Processing Pipeline
|
|
|
|
Processes chapters from Adam Smith's "The Wealth of Nations" through a
|
|
three-stage analysis pipeline, mapping economic content to Stafford Beer's
|
|
Viable System Model.
|
|
|
|
Pipeline per chapter:
|
|
1. extract-entities — Extract economic entities from chapter text
|
|
2. map-to-vsm — Map entities to VSM concepts
|
|
3. synthesize-analysis — Produce chapter-level VSM analysis
|
|
|
|
After all chapters:
|
|
4. assess-metrics — Evaluate completeness and consistency
|
|
|
|
Usage:
|
|
# Process a single chapter
|
|
python process_chapters.py --chapter book-1-chapter-01
|
|
|
|
# Process all chapters in Book I
|
|
python process_chapters.py --book 1
|
|
|
|
# Process all chapters
|
|
python process_chapters.py --all
|
|
|
|
# Assess metrics only (after chapters have been processed)
|
|
python process_chapters.py --metrics
|
|
|
|
# List available chapters
|
|
python process_chapters.py --list
|
|
"""
|
|
|
|
import argparse
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
# Add project root to path
|
|
project_root = Path(__file__).parent.parent.parent
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
from markitect.prompts.models import Artifact, ArtifactType
|
|
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
|
|
from markitect.prompts.dependencies.repository import SQLiteDependencyRepository
|
|
from markitect.prompts.services.artifact_service import ArtifactService
|
|
from markitect.prompts.templates.models import PromptTemplate
|
|
from markitect.prompts.templates.analyzer import TemplateAnalyzer
|
|
from markitect.prompts.resolver.resolver import PromptResolver
|
|
from markitect.prompts.resolver.compiler import ContextCompiler
|
|
from markitect.prompts.resolver.strategy import ResolutionConfig, MultiSpaceResolutionStrategy
|
|
from markitect.prompts.execution.manifest import RunManifest
|
|
from markitect.prompts.dependencies.graph import GraphBuilder
|
|
from markitect.prompts.traceability.service import TraceabilityService
|
|
from markitect.prompts.queries.operations import PromptQueryService
|
|
|
|
|
|
class ChapterProcessor:
|
|
"""Processes Wealth of Nations chapters through the VSM analysis pipeline."""
|
|
|
|
def __init__(
|
|
self,
|
|
example_dir: Path,
|
|
db_path: Optional[str] = None,
|
|
llm_adapter=None,
|
|
):
|
|
self.example_dir = example_dir
|
|
self.db_path = db_path or str(example_dir / "infospace.db")
|
|
self.llm_adapter = llm_adapter
|
|
|
|
# Initialize repositories
|
|
self.artifact_repo = SQLiteArtifactRepository(self.db_path)
|
|
self.dep_repo = SQLiteDependencyRepository(self.db_path)
|
|
self.artifact_service = ArtifactService(self.artifact_repo)
|
|
self.graph_builder = GraphBuilder(self.dep_repo)
|
|
self.trace_service = TraceabilityService(
|
|
self.artifact_repo, self.dep_repo, db_path=self.db_path
|
|
)
|
|
self.query_service = PromptQueryService(
|
|
self.artifact_repo, self.dep_repo, db_path=self.db_path
|
|
)
|
|
|
|
# Template analysis and compilation
|
|
self.analyzer = TemplateAnalyzer()
|
|
self.compiler = ContextCompiler()
|
|
|
|
# Information spaces
|
|
self.spaces = {
|
|
"templates": "infospace-templates",
|
|
"sources": "infospace-sources",
|
|
"guidelines": "infospace-guidelines",
|
|
"vsm-reference": "infospace-vsm-reference",
|
|
"entities": "infospace-entities",
|
|
"mappings": "infospace-mappings",
|
|
"analyses": "infospace-analyses",
|
|
"metrics": "infospace-metrics",
|
|
}
|
|
|
|
# ── Artifact Management ──────────────────────────────────────────
|
|
|
|
def load_or_create_artifact(
|
|
self,
|
|
space: str,
|
|
filepath: Path,
|
|
artifact_type: ArtifactType,
|
|
name: Optional[str] = None,
|
|
) -> Artifact:
|
|
"""Load artifact from file, create in repo if needed."""
|
|
if name is None:
|
|
name = filepath.stem
|
|
|
|
content = filepath.read_text()
|
|
|
|
existing = self.artifact_repo.get_by_name(space, name)
|
|
if existing:
|
|
return existing
|
|
|
|
artifact = Artifact.create(
|
|
space_id=space, name=name, content=content, artifact_type=artifact_type
|
|
)
|
|
artifact = self.artifact_repo.create(artifact)
|
|
print(f" + {name} ({artifact.content_digest[:8]})")
|
|
return artifact
|
|
|
|
def store_output_artifact(
|
|
self, space: str, name: str, content: str, artifact_type: ArtifactType
|
|
) -> Artifact:
|
|
"""Store a generated output artifact, updating if it already exists."""
|
|
existing = self.artifact_repo.get_by_name(space, name)
|
|
if existing:
|
|
self.artifact_repo.delete(existing.id)
|
|
|
|
artifact = Artifact.create(
|
|
space_id=space, name=name, content=content, artifact_type=artifact_type
|
|
)
|
|
artifact = self.artifact_repo.create(artifact)
|
|
return artifact
|
|
|
|
def bind_macro_artifact(self, space: str, macro_name: str, content: str) -> Artifact:
|
|
"""Bind content to a macro name in a space (for template resolution)."""
|
|
existing = self.artifact_repo.get_by_name(space, macro_name)
|
|
if existing:
|
|
self.artifact_repo.delete(existing.id)
|
|
|
|
artifact = Artifact.create(
|
|
space_id=space,
|
|
name=macro_name,
|
|
content=content,
|
|
artifact_type=ArtifactType.CONTENT,
|
|
)
|
|
artifact = self.artifact_repo.create(artifact)
|
|
return artifact
|
|
|
|
# ── Setup ────────────────────────────────────────────────────────
|
|
|
|
def setup(self):
|
|
"""Load all static artifacts (templates, guidelines, VSM reference)."""
|
|
print("Loading artifacts...")
|
|
|
|
# Templates
|
|
for tmpl_file in (self.example_dir / "templates").glob("*.md"):
|
|
self.load_or_create_artifact(
|
|
self.spaces["templates"], tmpl_file, ArtifactType.TEMPLATE
|
|
)
|
|
|
|
# VSM reference
|
|
for ref_file in (self.example_dir / "artifacts" / "vsm-reference").glob("*.md"):
|
|
self.load_or_create_artifact(
|
|
self.spaces["vsm-reference"], ref_file, ArtifactType.CONTENT,
|
|
name="vsm_framework",
|
|
)
|
|
|
|
# Guidelines
|
|
guideline_name_map = {
|
|
"extraction-rules.md": "extraction_rules",
|
|
"mapping-rules.md": "mapping_rules",
|
|
}
|
|
for guide_file in (self.example_dir / "artifacts" / "guidelines").glob("*.md"):
|
|
name = guideline_name_map.get(guide_file.name, guide_file.stem)
|
|
self.load_or_create_artifact(
|
|
self.spaces["guidelines"], guide_file, ArtifactType.CONTENT, name=name
|
|
)
|
|
|
|
print(" Done.\n")
|
|
|
|
# ── Template Resolution ──────────────────────────────────────────
|
|
|
|
def resolve_and_compile(
|
|
self, template_name: str, extra_spaces: list[str]
|
|
) -> Optional[str]:
|
|
"""Resolve macros and compile a template into a final prompt string.
|
|
|
|
Uses TemplateAnalyzer to parse @{target} macros from the template,
|
|
the resolver to look up artifact content, and ContextCompiler to
|
|
assemble the final prompt.
|
|
"""
|
|
template_artifact = self.artifact_repo.get_by_name(
|
|
self.spaces["templates"], template_name
|
|
)
|
|
if not template_artifact:
|
|
print(f" ERROR: Template '{template_name}' not found")
|
|
return None
|
|
|
|
template = PromptTemplate.from_artifact(template_artifact)
|
|
template_content = template_artifact.content
|
|
|
|
# Analyze template to extract @{target} macros
|
|
self.analyzer.analyze(template, template_content)
|
|
|
|
config = ResolutionConfig(
|
|
space_id=self.spaces["templates"],
|
|
included_spaces=[self.spaces[s] for s in extra_spaces],
|
|
)
|
|
|
|
strategy = MultiSpaceResolutionStrategy()
|
|
resolver = PromptResolver(self.artifact_service, strategy)
|
|
result = resolver.resolve_template(template, config)
|
|
|
|
if not result.success:
|
|
print(f" ERROR: Resolution failed: {result.context.errors}")
|
|
return None
|
|
|
|
# Compile template with resolved content
|
|
compiled = self.compiler.compile(template, template_content, result)
|
|
return compiled.content
|
|
|
|
# ── LLM Execution Helpers ─────────────────────────────────────────
|
|
|
|
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
|
|
"""Call the LLM and return the content string, or ``None`` on failure.
|
|
|
|
Retries up to 3 times on rate-limit (429) errors with exponential backoff.
|
|
Does **not** write any files — callers decide where to persist.
|
|
"""
|
|
import time as _time
|
|
from markitect.prompts.execution.models import RunConfig
|
|
from markitect.llm.exceptions import LLMRateLimitError
|
|
|
|
print(f" Calling LLM ({stage_label})...")
|
|
t0 = _time.time()
|
|
max_retries = 3
|
|
for attempt in range(max_retries + 1):
|
|
try:
|
|
response = self.llm_adapter.execute_prompt(prompt, RunConfig(max_tokens=max_tokens))
|
|
break # success
|
|
except LLMRateLimitError as exc:
|
|
if attempt < max_retries:
|
|
wait = 15 * (attempt + 1) # 15, 30, 45 seconds
|
|
print(f" Rate limited, retrying in {wait}s (attempt {attempt + 1}/{max_retries})...")
|
|
_time.sleep(wait)
|
|
else:
|
|
print(f" LLM rate limit after {max_retries} retries ({_time.time() - t0:.1f}s): {exc}")
|
|
return None
|
|
except Exception as exc:
|
|
print(f" LLM error ({_time.time() - t0:.1f}s): {exc}")
|
|
return None
|
|
|
|
elapsed = _time.time() - t0
|
|
usage = response.usage
|
|
print(
|
|
f" LLM done in {elapsed:.1f}s — "
|
|
f"prompt {usage.get('prompt_tokens', '?')} tok, "
|
|
f"completion {usage.get('completion_tokens', '?')} tok, "
|
|
f"total {usage.get('total_tokens', '?')} tok"
|
|
)
|
|
|
|
content = response.content
|
|
if not content or not content.strip():
|
|
print(f" LLM returned empty content")
|
|
return None
|
|
|
|
return content
|
|
|
|
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
|
|
"""Call the LLM, write the result to *output_file*, and return it."""
|
|
content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
|
|
if content:
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
output_file.write_text(content)
|
|
print(f" LLM output written to {output_file.name}")
|
|
return content
|
|
|
|
# ── Entity Management (flat canonical set) ─────────────────────
|
|
|
|
@staticmethod
|
|
def _normalize_entity_name(name: str) -> str:
|
|
"""Normalize an entity name to a kebab-case filename stem."""
|
|
slug = name.lower().strip()
|
|
slug = slug.replace("_", "-").replace(" ", "-")
|
|
slug = re.sub(r"[^a-z0-9-]", "", slug)
|
|
slug = re.sub(r"-{2,}", "-", slug)
|
|
return slug.strip("-")
|
|
|
|
def _entities_dir(self) -> Path:
|
|
return self.example_dir / "output" / "entities"
|
|
|
|
def _archive_dir(self) -> Path:
|
|
return self._entities_dir() / "archive"
|
|
|
|
def _list_existing_entity_names(self) -> list[str]:
|
|
"""Return sorted slugs of all canonical entity files already on disk."""
|
|
return sorted(
|
|
f.stem
|
|
for f in self._entities_dir().glob("*.md")
|
|
if not f.name.endswith("-entities.md")
|
|
and not f.name.endswith("-prompt.md")
|
|
)
|
|
|
|
def archive_entity(self, slug: str, reason: str) -> None:
|
|
"""Move a canonical entity to the archive with a documented reason.
|
|
|
|
The entity file is prepended with an archive header explaining why
|
|
it was retired, then moved to ``output/entities/archive/<slug>.md``.
|
|
Chapter views that reference this entity are **not** updated
|
|
automatically — review and update them manually.
|
|
"""
|
|
src = self._entities_dir() / f"{slug}.md"
|
|
if not src.exists():
|
|
print(f" Entity not found: {slug}")
|
|
return
|
|
|
|
archive = self._archive_dir()
|
|
archive.mkdir(parents=True, exist_ok=True)
|
|
dest = archive / f"{slug}.md"
|
|
|
|
from datetime import date
|
|
header = (
|
|
f"<!-- ARCHIVED {date.today().isoformat()}\n"
|
|
f" Reason: {reason}\n"
|
|
f"-->\n\n"
|
|
)
|
|
content = src.read_text()
|
|
dest.write_text(header + content)
|
|
src.unlink()
|
|
|
|
# Report which chapter views still reference this entity
|
|
refs = []
|
|
for view in self._entities_dir().glob("*-entities.md"):
|
|
if f'include "{slug}.md"' in view.read_text():
|
|
refs.append(view.name)
|
|
|
|
print(f" Archived: {slug}.md -> archive/{slug}.md")
|
|
print(f" Reason: {reason}")
|
|
if refs:
|
|
print(f" Referenced by: {', '.join(refs)} (update these views)")
|
|
print(f" Canonical set: {len(self._list_existing_entity_names())} entities")
|
|
|
|
def _split_entities(
|
|
self, combined_content: str
|
|
) -> list[tuple[str, Path]]:
|
|
"""Split combined LLM output into the flat canonical entity directory.
|
|
|
|
Writes each entity to ``output/entities/<slug>.md``. If a file
|
|
with that slug already exists it is **skipped** (first-occurrence
|
|
wins), but the entity is still included in the returned list so
|
|
the chapter view can reference it.
|
|
|
|
Returns list of (entity_name, file_path) for every entity in
|
|
*combined_content* (new and pre-existing alike).
|
|
"""
|
|
entities_dir = self._entities_dir()
|
|
entities_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
parts = re.split(
|
|
r"^---\s*ENTITY:\s*(.+?)\s*---\s*$",
|
|
combined_content,
|
|
flags=re.MULTILINE,
|
|
)
|
|
|
|
entity_files: list[tuple[str, Path]] = []
|
|
new_count = 0
|
|
skipped_count = 0
|
|
|
|
for i in range(1, len(parts), 2):
|
|
entity_name = parts[i]
|
|
entity_content = parts[i + 1].strip() if i + 1 < len(parts) else ""
|
|
|
|
slug = self._normalize_entity_name(entity_name)
|
|
if not slug:
|
|
continue
|
|
|
|
file_path = entities_dir / f"{slug}.md"
|
|
if file_path.exists():
|
|
skipped_count += 1
|
|
else:
|
|
file_path.write_text(entity_content + "\n")
|
|
new_count += 1
|
|
|
|
entity_files.append((entity_name, file_path))
|
|
|
|
msg = f" {new_count} new entities written"
|
|
if skipped_count:
|
|
msg += f", {skipped_count} pre-existing (skipped)"
|
|
print(msg)
|
|
return entity_files
|
|
|
|
def _write_chapter_entity_view(
|
|
self, chapter_id: str, entity_files: list[tuple[str, Path]]
|
|
) -> Path:
|
|
"""Write a per-chapter view file that transcludes individual entities."""
|
|
parts = chapter_id.split("-")
|
|
book_num = int(parts[1]) if len(parts) >= 2 else 1
|
|
ch_num = int(parts[3]) if len(parts) >= 4 else 0
|
|
roman = {1: "I", 2: "II", 3: "III", 4: "IV", 5: "V"}.get(book_num, str(book_num))
|
|
title = f"# Economic Entities — Book {roman}, Chapter {ch_num}\n"
|
|
|
|
lines = [title]
|
|
for _name, file_path in entity_files:
|
|
lines.append(f'{{{{ include "{file_path.name}" }}}}')
|
|
lines.append("")
|
|
lines.append("---")
|
|
lines.append("")
|
|
|
|
# Remove trailing separator after last entity
|
|
if lines and lines[-1] == "" and len(lines) >= 3 and lines[-2] == "---":
|
|
lines = lines[:-2]
|
|
|
|
view_path = self._entities_dir() / f"{chapter_id}-entities.md"
|
|
view_path.write_text("\n".join(lines) + "\n")
|
|
print(f" Chapter view written to {view_path.name}")
|
|
return view_path
|
|
|
|
def _read_entities_from_view(
|
|
self, chapter_id: str
|
|
) -> tuple[str, list[tuple[str, Path]]]:
|
|
"""Reconstruct combined entity content from a chapter view file.
|
|
|
|
Parses ``{{ include "..." }}`` directives in the view to discover
|
|
which canonical entity files belong to this chapter, reads them,
|
|
and rebuilds the delimited combined content needed by downstream
|
|
stages.
|
|
"""
|
|
from markitect.packaging.transclusion.directives import DirectiveParser
|
|
|
|
view_path = self._entities_dir() / f"{chapter_id}-entities.md"
|
|
view_content = view_path.read_text()
|
|
includes = DirectiveParser.extract_file_includes(view_content)
|
|
|
|
entities_dir = self._entities_dir()
|
|
entity_files: list[tuple[str, Path]] = []
|
|
parts: list[str] = []
|
|
|
|
for rel_path in includes:
|
|
file_path = entities_dir / rel_path
|
|
if not file_path.exists():
|
|
continue
|
|
slug = file_path.stem
|
|
body = file_path.read_text().strip()
|
|
parts.append(f"--- ENTITY: {slug} ---\n\n{body}")
|
|
entity_files.append((slug, file_path))
|
|
|
|
combined = "\n\n".join(parts) + "\n" if parts else ""
|
|
return combined, entity_files
|
|
|
|
# ── Pipeline Stages ──────────────────────────────────────────────
|
|
|
|
def stage_extract_entities(self, chapter_id: str, chapter_content: str) -> Optional[str]:
|
|
"""Stage 1: Extract economic entities from a chapter.
|
|
|
|
Canonical entity files live in a **flat** directory
|
|
(``output/entities/<slug>.md``). Duplicates across chapters are
|
|
skipped — first occurrence wins. The per-chapter view file
|
|
(``<chapter_id>-entities.md``) is a **secondary** transclusion view
|
|
that ``{{ include }}``s each entity relevant to the chapter.
|
|
"""
|
|
print(f" [1/3] Extracting entities...")
|
|
|
|
# Bind the chapter content to the macro name
|
|
self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content)
|
|
|
|
# Bind existing entity list so the LLM knows what already exists
|
|
existing = self._list_existing_entity_names()
|
|
if existing:
|
|
entity_list = "\n".join(f"- {name}" for name in existing)
|
|
else:
|
|
entity_list = "(none — this is the first chapter)"
|
|
self.bind_macro_artifact(
|
|
self.spaces["entities"], "existing_entities", entity_list
|
|
)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"extract-entities",
|
|
["sources", "guidelines", "vsm-reference", "entities"],
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
# Write compiled prompt for inspection
|
|
prompt_file = self._entities_dir() / f"{chapter_id}-prompt.md"
|
|
prompt_file.parent.mkdir(parents=True, exist_ok=True)
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
view_file = self._entities_dir() / f"{chapter_id}-entities.md"
|
|
|
|
# ── PRIMARY: chapter view with transclusion already on disk ──
|
|
if view_file.exists() and "{{ include" in view_file.read_text():
|
|
content, entity_files = self._read_entities_from_view(chapter_id)
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found chapter view referencing {len(entity_files)} entities")
|
|
return content
|
|
|
|
# ── MIGRATION: per-chapter subdirectory (previous format) ──
|
|
subdir = self._entities_dir() / chapter_id
|
|
if subdir.is_dir() and list(subdir.glob("*.md")):
|
|
print(f" Migrating per-chapter subdir: {chapter_id}/")
|
|
entity_files: list[tuple[str, Path]] = []
|
|
entities_dir = self._entities_dir()
|
|
for src in sorted(subdir.glob("*.md")):
|
|
dest = entities_dir / src.name
|
|
if not dest.exists():
|
|
src.rename(dest)
|
|
entity_files.append((src.stem, dest))
|
|
# Clean up empty subdir
|
|
if not list(subdir.glob("*")):
|
|
subdir.rmdir()
|
|
self._write_chapter_entity_view(chapter_id, entity_files)
|
|
content = self._read_entities_from_view(chapter_id)[0]
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
# ── MIGRATION: legacy combined file (pre-split format) ──
|
|
if view_file.exists():
|
|
raw = view_file.read_text()
|
|
if "--- ENTITY:" in raw:
|
|
print(f" Migrating legacy combined file: {view_file.name}")
|
|
entity_files = self._split_entities(raw)
|
|
self._write_chapter_entity_view(chapter_id, entity_files)
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
raw,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return raw
|
|
|
|
# ── GENERATE: call LLM, persist individual files first ──
|
|
if self.llm_adapter and prompt:
|
|
combined = self._call_llm(prompt, "entities")
|
|
if combined:
|
|
entity_files = self._split_entities(combined)
|
|
self._write_chapter_entity_view(chapter_id, entity_files)
|
|
self.store_output_artifact(
|
|
self.spaces["entities"],
|
|
f"{chapter_id}-entities",
|
|
combined,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return combined
|
|
|
|
print(f" Awaiting entity files in: output/entities/")
|
|
return None
|
|
|
|
def stage_map_to_vsm(self, chapter_id: str, entities_content: str) -> Optional[str]:
|
|
"""Stage 2: Map extracted entities to VSM concepts."""
|
|
print(f" [2/3] Mapping to VSM...")
|
|
|
|
self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"map-to-vsm", ["entities", "vsm-reference", "guidelines"]
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
prompt_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-prompt.md"
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
output_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-mappings.md"
|
|
if output_file.exists():
|
|
content = output_file.read_text()
|
|
self.store_output_artifact(
|
|
self.spaces["mappings"],
|
|
f"{chapter_id}-mappings",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found existing output: {output_file.name}")
|
|
return content
|
|
|
|
if self.llm_adapter and prompt:
|
|
content = self._execute_llm(prompt, output_file, "mappings")
|
|
if content:
|
|
self.store_output_artifact(
|
|
self.spaces["mappings"],
|
|
f"{chapter_id}-mappings",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}")
|
|
return None
|
|
|
|
def stage_synthesize_analysis(
|
|
self, chapter_id: str, chapter_content: str, entities_content: str, mappings_content: str
|
|
) -> Optional[str]:
|
|
"""Stage 3: Synthesize chapter-level VSM analysis."""
|
|
print(f" [3/3] Synthesizing analysis...")
|
|
|
|
self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content)
|
|
self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content)
|
|
self.bind_macro_artifact(self.spaces["mappings"], "mappings", mappings_content)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"synthesize-analysis",
|
|
["sources", "entities", "mappings", "vsm-reference"],
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
prompt_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-prompt.md"
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
output_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-analysis.md"
|
|
if output_file.exists():
|
|
content = output_file.read_text()
|
|
self.store_output_artifact(
|
|
self.spaces["analyses"],
|
|
f"{chapter_id}-analysis",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found existing output: {output_file.name}")
|
|
return content
|
|
|
|
if self.llm_adapter and prompt:
|
|
content = self._execute_llm(prompt, output_file, "analysis")
|
|
if content:
|
|
self.store_output_artifact(
|
|
self.spaces["analyses"],
|
|
f"{chapter_id}-analysis",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}")
|
|
return None
|
|
|
|
# ── Metrics ──────────────────────────────────────────────────────
|
|
|
|
def assess_metrics(self) -> Optional[str]:
|
|
"""Run the assess-metrics template across all completed analyses."""
|
|
print("Assessing metrics...")
|
|
|
|
analyses_dir = self.example_dir / "output" / "analyses"
|
|
analysis_files = sorted(analyses_dir.glob("*-analysis.md"))
|
|
|
|
if not analysis_files:
|
|
print(" No completed analyses found. Process chapters first.")
|
|
return None
|
|
|
|
# Concatenate all analyses
|
|
all_analyses = []
|
|
for f in analysis_files:
|
|
all_analyses.append(f"<!-- Source: {f.name} -->\n{f.read_text()}")
|
|
combined = "\n\n---\n\n".join(all_analyses)
|
|
|
|
self.bind_macro_artifact(self.spaces["analyses"], "all_analyses", combined)
|
|
|
|
prompt = self.resolve_and_compile(
|
|
"assess-metrics", ["analyses", "vsm-reference"]
|
|
)
|
|
if not prompt:
|
|
return None
|
|
|
|
prompt_file = self.example_dir / "output" / "metrics" / "metrics-prompt.md"
|
|
prompt_file.write_text(prompt)
|
|
print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}")
|
|
|
|
output_file = self.example_dir / "output" / "metrics" / "metrics-report.md"
|
|
if output_file.exists():
|
|
content = output_file.read_text()
|
|
self.store_output_artifact(
|
|
self.spaces["metrics"],
|
|
"metrics-report",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
print(f" Found existing output: {output_file.name}")
|
|
return content
|
|
|
|
if self.llm_adapter and prompt:
|
|
content = self._execute_llm(prompt, output_file, "metrics")
|
|
if content:
|
|
self.store_output_artifact(
|
|
self.spaces["metrics"],
|
|
"metrics-report",
|
|
content,
|
|
ArtifactType.GENERATED,
|
|
)
|
|
return content
|
|
|
|
print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}")
|
|
return None
|
|
|
|
# ── Chapter Processing ───────────────────────────────────────────
|
|
|
|
def process_chapter(self, chapter_id: str, auto_commit: bool = True):
|
|
"""Run the full pipeline for a single chapter."""
|
|
source_file = self.example_dir / "artifacts" / "sources" / f"{chapter_id}.md"
|
|
if not source_file.exists():
|
|
print(f"ERROR: Source file not found: {source_file}")
|
|
return
|
|
|
|
print(f"Processing: {chapter_id}")
|
|
print(f"{'=' * 60}")
|
|
|
|
chapter_content = source_file.read_text()
|
|
|
|
# Store source artifact
|
|
self.load_or_create_artifact(
|
|
self.spaces["sources"], source_file, ArtifactType.CONTENT
|
|
)
|
|
|
|
# Stage 1: Extract entities
|
|
entities = self.stage_extract_entities(chapter_id, chapter_content)
|
|
if entities is None:
|
|
print(f"\n Pipeline paused. Generate entities output and re-run.")
|
|
return
|
|
|
|
# Stage 2: Map to VSM
|
|
mappings = self.stage_map_to_vsm(chapter_id, entities)
|
|
if mappings is None:
|
|
print(f"\n Pipeline paused. Generate mappings output and re-run.")
|
|
return
|
|
|
|
# Stage 3: Synthesize analysis
|
|
analysis = self.stage_synthesize_analysis(
|
|
chapter_id, chapter_content, entities, mappings
|
|
)
|
|
if analysis is None:
|
|
print(f"\n Pipeline paused. Generate analysis output and re-run.")
|
|
return
|
|
|
|
print(f"\n Chapter {chapter_id} fully processed.")
|
|
|
|
# Record dependency edges
|
|
self._record_chapter_dependencies(chapter_id)
|
|
|
|
# Git commit
|
|
if auto_commit:
|
|
self._git_commit_chapter(chapter_id)
|
|
|
|
def _record_chapter_dependencies(self, chapter_id: str):
|
|
"""Record dependency edges for a processed chapter."""
|
|
run_id = f"run-{chapter_id}"
|
|
manifest = RunManifest.create(
|
|
run_id=run_id,
|
|
template_id="extract-entities",
|
|
template_name="extract-entities",
|
|
template_digest="",
|
|
)
|
|
|
|
# Source → Run
|
|
source = self.artifact_repo.get_by_name(self.spaces["sources"], chapter_id)
|
|
if source:
|
|
manifest.add_dependency_edge(source.id, run_id, "requires")
|
|
|
|
# Run → Outputs
|
|
for output_type in ["entities", "mappings", "analyses"]:
|
|
space = self.spaces[output_type]
|
|
suffix = {"entities": "entities", "mappings": "mappings", "analyses": "analysis"}
|
|
name = f"{chapter_id}-{suffix[output_type]}"
|
|
artifact = self.artifact_repo.get_by_name(space, name)
|
|
if artifact:
|
|
manifest.add_dependency_edge(run_id, artifact.id, "generates")
|
|
|
|
try:
|
|
edges = self.graph_builder.persist_edges(manifest)
|
|
print(f" Recorded {len(edges)} dependency edges.")
|
|
except Exception as e:
|
|
print(f" Warning: Could not record dependencies: {e}")
|
|
|
|
def _git_commit_chapter(self, chapter_id: str):
|
|
"""Commit chapter outputs to git."""
|
|
output_dir = self.example_dir / "output"
|
|
try:
|
|
subprocess.run(
|
|
["git", "add", str(output_dir)],
|
|
cwd=str(self.example_dir),
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
subprocess.run(
|
|
["git", "commit", "-m", f"infospace: process {chapter_id}\n\n"
|
|
f"Extract entities, map to VSM, and synthesize analysis\n"
|
|
f"for {chapter_id}."],
|
|
cwd=str(project_root),
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
print(f" Git commit: infospace: process {chapter_id}")
|
|
except subprocess.CalledProcessError as e:
|
|
print(f" Warning: Git commit skipped ({e})")
|
|
|
|
# ── Listing ──────────────────────────────────────────────────────
|
|
|
|
def list_chapters(self):
|
|
"""List all available chapters and their processing status."""
|
|
sources_dir = self.example_dir / "artifacts" / "sources"
|
|
chapters = sorted(f.stem for f in sources_dir.glob("*.md"))
|
|
|
|
print(f"Available chapters ({len(chapters)}):\n")
|
|
print(f" {'Chapter':<30} {'Entities':<12} {'Mappings':<12} {'Analysis':<12}")
|
|
print(f" {'-'*30} {'-'*12} {'-'*12} {'-'*12}")
|
|
|
|
for ch in chapters:
|
|
view_file = self._entities_dir() / f"{ch}-entities.md"
|
|
entity_count = 0
|
|
if view_file.exists() and "{{ include" in view_file.read_text():
|
|
from markitect.packaging.transclusion.directives import DirectiveParser
|
|
entity_count = len(DirectiveParser.extract_file_includes(view_file.read_text()))
|
|
entities = f"done ({entity_count})" if entity_count else "-"
|
|
mappings = "done" if (self.example_dir / "output" / "mappings" / f"{ch}-mappings.md").exists() else "-"
|
|
analysis = "done" if (self.example_dir / "output" / "analyses" / f"{ch}-analysis.md").exists() else "-"
|
|
print(f" {ch:<30} {entities:<12} {mappings:<12} {analysis:<12}")
|
|
|
|
total_entities = len(self._list_existing_entity_names())
|
|
if total_entities:
|
|
print(f"\n Canonical entity set: {total_entities} unique entities")
|
|
archive = self._archive_dir()
|
|
if archive.exists():
|
|
archived = len(list(archive.glob("*.md")))
|
|
if archived:
|
|
print(f" Archived entities: {archived}")
|
|
|
|
# ── Statistics ───────────────────────────────────────────────────
|
|
|
|
def show_stats(self):
|
|
"""Show dependency graph statistics."""
|
|
print("\nDependency Statistics:")
|
|
try:
|
|
stats = self.query_service.get_dependency_stats()
|
|
print(f" Nodes: {stats['total_nodes']}")
|
|
print(f" Edges: {stats['total_edges']}")
|
|
print(f" Root artifacts: {stats['root_count']}")
|
|
print(f" Leaf artifacts: {stats['leaf_count']}")
|
|
print(f" Has cycles: {stats['has_cycles']}")
|
|
except Exception as e:
|
|
print(f" (No data yet: {e})")
|
|
|
|
|
|
# ── Infospace tooling integration ─────────────────────────────────
|
|
|
|
|
|
def _load_infospace(example_dir: Path):
|
|
"""Load infospace config and entities from the example directory."""
|
|
from markitect.infospace.config import load_infospace_config
|
|
from markitect.infospace.entity_parser import parse_entity_directory
|
|
|
|
config_path = example_dir / "infospace.yaml"
|
|
if not config_path.is_file():
|
|
print("Error: No infospace.yaml found. Create one first.")
|
|
sys.exit(1)
|
|
|
|
config = load_infospace_config(config_path)
|
|
entities_dir = example_dir / config.entities_dir
|
|
entities = parse_entity_directory(entities_dir) if entities_dir.is_dir() else []
|
|
return config, config_path, entities
|
|
|
|
|
|
def _run_infospace_status(example_dir: Path):
|
|
"""Show infospace status using the tooling layer."""
|
|
from markitect.infospace.state import build_state
|
|
|
|
config, config_path, entities = _load_infospace(example_dir)
|
|
state = build_state(config, entities=entities)
|
|
|
|
print(f"Infospace: {state.topic_name}")
|
|
print(f"Domain: {config.topic.domain}")
|
|
print(f"Entities: {state.entity_count}")
|
|
if state.domains:
|
|
print(f"Domains: {', '.join(state.domains)}")
|
|
if config.disciplines:
|
|
names = [d.name for d in config.disciplines]
|
|
print(f"Disciplines: {', '.join(names)}")
|
|
|
|
# Show processing progress
|
|
sources_dir = example_dir / "artifacts" / "sources"
|
|
total_chapters = len(list(sources_dir.glob("*.md")))
|
|
processed = len(list((example_dir / "output" / "analyses").glob("*-analysis.md")))
|
|
print(f"Chapters: {processed}/{total_chapters} processed")
|
|
|
|
|
|
def _run_infospace_check(example_dir: Path):
|
|
"""Run collection-level quality checks."""
|
|
from markitect.infospace.checks import run_all_checks
|
|
from markitect.infospace.history import record_check_results
|
|
|
|
config, config_path, entities = _load_infospace(example_dir)
|
|
|
|
if not entities:
|
|
print("No entities to check.")
|
|
return
|
|
|
|
print(f"Running collection checks on {len(entities)} entities...\n")
|
|
report = run_all_checks(entities=entities)
|
|
|
|
d = report.to_dict()
|
|
for concern_name, concern_data in d.items():
|
|
label = concern_data.get("concern", concern_name.upper())
|
|
print(f" {label} — {concern_name}")
|
|
for k, v in concern_data.items():
|
|
if k == "concern":
|
|
continue
|
|
print(f" {k}: {v}")
|
|
print()
|
|
|
|
m = report.metrics()
|
|
if m:
|
|
print("Metrics summary:")
|
|
for k, v in sorted(m.items()):
|
|
print(f" {k}: {v:.4f}")
|
|
snap = record_check_results(report, config, example_dir, entity_count=len(entities))
|
|
print(f"\nRecorded snapshot {snap.snapshot_id}")
|
|
|
|
|
|
def _run_infospace_viability(example_dir: Path):
|
|
"""Show viability dashboard."""
|
|
from markitect.infospace.history import read_metrics_file
|
|
from markitect.infospace.state import build_state
|
|
|
|
config, config_path, entities = _load_infospace(example_dir)
|
|
|
|
if not config.viability:
|
|
print("No viability thresholds configured.")
|
|
return
|
|
|
|
metrics = read_metrics_file(example_dir / config.metrics_dir / "metrics.yaml")
|
|
if not metrics:
|
|
print("No metrics available. Run --infospace-check first.")
|
|
print("\nConfigured thresholds:")
|
|
for name, t in config.viability.items():
|
|
bounds = []
|
|
if t.min is not None:
|
|
bounds.append(f"min={t.min}")
|
|
if t.max is not None:
|
|
bounds.append(f"max={t.max}")
|
|
print(f" {name}: {', '.join(bounds)}")
|
|
return
|
|
|
|
state = build_state(config, entities=entities, metrics=metrics)
|
|
|
|
print(f"{'Metric':<30} {'Value':>8} {'Threshold':>15} {'Status':>8}")
|
|
print("-" * 63)
|
|
for r in state.viability_results:
|
|
bounds = []
|
|
if r.threshold.min is not None:
|
|
bounds.append(f"min={r.threshold.min}")
|
|
if r.threshold.max is not None:
|
|
bounds.append(f"max={r.threshold.max}")
|
|
status_str = "PASS" if r.passed else "FAIL"
|
|
print(f"{r.metric:<30} {r.value:>8.4f} {', '.join(bounds):>15} {status_str:>8}")
|
|
|
|
print()
|
|
if state.is_viable:
|
|
print(f"Viable: YES ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|
|
else:
|
|
print(f"Viable: NO ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Process Wealth of Nations chapters through VSM analysis pipeline"
|
|
)
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
group.add_argument("--chapter", type=str, help="Process a single chapter (e.g., book-1-chapter-01)")
|
|
group.add_argument("--book", type=int, help="Process all chapters in a book (1-5)")
|
|
group.add_argument("--all", action="store_true", help="Process all chapters")
|
|
group.add_argument("--metrics", action="store_true", help="Assess metrics only")
|
|
group.add_argument("--list", action="store_true", help="List available chapters")
|
|
group.add_argument("--stats", action="store_true", help="Show dependency statistics")
|
|
group.add_argument("--archive-entity", type=str, metavar="SLUG",
|
|
help="Archive an entity (move to archive/ with reason)")
|
|
group.add_argument("--infospace-status", action="store_true",
|
|
help="Show infospace status via infospace tooling")
|
|
group.add_argument("--infospace-check", action="store_true",
|
|
help="Run collection-level quality checks (C1-C5)")
|
|
group.add_argument("--infospace-viability", action="store_true",
|
|
help="Show viability dashboard")
|
|
|
|
parser.add_argument("--reason", type=str, default=None,
|
|
help="Reason for archiving (used with --archive-entity)")
|
|
parser.add_argument("--no-commit", action="store_true", help="Skip git commits")
|
|
parser.add_argument(
|
|
"--provider",
|
|
type=str,
|
|
choices=["openrouter", "claude-code", "gemini", "openai"],
|
|
default=None,
|
|
help="LLM provider for auto-generating outputs (omit for manual mode)",
|
|
)
|
|
parser.add_argument("--model", type=str, default=None, help="Model name to pass to the LLM provider")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Build optional LLM adapter
|
|
_PROVIDER_DEFAULTS = {
|
|
"openrouter": "arcee-ai/trinity-large-preview:free",
|
|
}
|
|
llm_adapter = None
|
|
if args.provider:
|
|
from markitect.llm import create_adapter
|
|
model = args.model or _PROVIDER_DEFAULTS.get(args.provider)
|
|
llm_adapter = create_adapter(args.provider, model=model)
|
|
print(f"LLM: {args.provider} ({model or 'default'})")
|
|
|
|
example_dir = Path(__file__).parent
|
|
processor = ChapterProcessor(example_dir, llm_adapter=llm_adapter)
|
|
processor.setup()
|
|
|
|
if args.archive_entity:
|
|
if not args.reason:
|
|
parser.error("--archive-entity requires --reason")
|
|
processor.archive_entity(args.archive_entity, args.reason)
|
|
elif args.list:
|
|
processor.list_chapters()
|
|
elif args.stats:
|
|
processor.show_stats()
|
|
elif args.metrics:
|
|
processor.assess_metrics()
|
|
elif args.chapter:
|
|
processor.process_chapter(args.chapter, auto_commit=not args.no_commit)
|
|
elif args.book:
|
|
sources_dir = example_dir / "artifacts" / "sources"
|
|
chapters = sorted(
|
|
f.stem for f in sources_dir.glob(f"book-{args.book}-chapter-*.md")
|
|
)
|
|
if not chapters:
|
|
print(f"No chapters found for Book {args.book}")
|
|
return
|
|
print(f"Processing {len(chapters)} chapters from Book {args.book}\n")
|
|
for ch in chapters:
|
|
processor.process_chapter(ch, auto_commit=not args.no_commit)
|
|
print()
|
|
elif args.all:
|
|
sources_dir = example_dir / "artifacts" / "sources"
|
|
chapters = sorted(f.stem for f in sources_dir.glob("*.md"))
|
|
print(f"Processing all {len(chapters)} chapters\n")
|
|
for ch in chapters:
|
|
processor.process_chapter(ch, auto_commit=not args.no_commit)
|
|
print()
|
|
elif args.infospace_status:
|
|
_run_infospace_status(example_dir)
|
|
return
|
|
elif args.infospace_check:
|
|
_run_infospace_check(example_dir)
|
|
return
|
|
elif args.infospace_viability:
|
|
_run_infospace_viability(example_dir)
|
|
return
|
|
|
|
processor.show_stats()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|