#!/usr/bin/env python3 """ Infospace with History — Chapter Processing Pipeline Processes chapters from Adam Smith's "The Wealth of Nations" through a three-stage analysis pipeline, mapping economic content to Stafford Beer's Viable System Model. Pipeline per chapter: 1. extract-entities — Extract economic entities from chapter text 2. map-to-vsm — Map entities to VSM concepts 3. synthesize-analysis — Produce chapter-level VSM analysis After all chapters: 4. assess-metrics — Evaluate completeness and consistency Usage: # Process a single chapter python process_chapters.py --chapter book-1-chapter-01 # Process all chapters in Book I python process_chapters.py --book 1 # Process all chapters python process_chapters.py --all # Assess metrics only (after chapters have been processed) python process_chapters.py --metrics # List available chapters python process_chapters.py --list """ import argparse import subprocess import sys from pathlib import Path from typing import Optional # Add project root to path project_root = Path(__file__).parent.parent.parent sys.path.insert(0, str(project_root)) from markitect.prompts.models import Artifact, ArtifactType from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository from markitect.prompts.dependencies.repository import SQLiteDependencyRepository from markitect.prompts.services.artifact_service import ArtifactService from markitect.prompts.templates.models import PromptTemplate, ContentMacro, MacroKind from markitect.prompts.resolver.resolver import PromptResolver from markitect.prompts.resolver.compiler import ContextCompiler from markitect.prompts.resolver.strategy import ResolutionConfig, MultiSpaceResolutionStrategy from markitect.prompts.execution.manifest import RunManifest from markitect.prompts.dependencies.graph import GraphBuilder from markitect.prompts.traceability.service import TraceabilityService from markitect.prompts.queries.operations import PromptQueryService class ChapterProcessor: """Processes Wealth of Nations chapters through the VSM analysis pipeline.""" def __init__( self, example_dir: Path, db_path: Optional[str] = None, llm_adapter=None, ): self.example_dir = example_dir self.db_path = db_path or str(example_dir / "infospace.db") self.llm_adapter = llm_adapter # Initialize repositories self.artifact_repo = SQLiteArtifactRepository(self.db_path) self.dep_repo = SQLiteDependencyRepository(self.db_path) self.artifact_service = ArtifactService(self.artifact_repo) self.graph_builder = GraphBuilder(self.dep_repo) self.trace_service = TraceabilityService( self.artifact_repo, self.dep_repo, db_path=self.db_path ) self.query_service = PromptQueryService( self.artifact_repo, self.dep_repo, db_path=self.db_path ) # Information spaces self.spaces = { "templates": "infospace-templates", "sources": "infospace-sources", "guidelines": "infospace-guidelines", "vsm-reference": "infospace-vsm-reference", "entities": "infospace-entities", "mappings": "infospace-mappings", "analyses": "infospace-analyses", "metrics": "infospace-metrics", } # Content cache (repository stores metadata, we cache content) self.artifact_content: dict[str, str] = {} # ── Artifact Management ────────────────────────────────────────── def load_or_create_artifact( self, space: str, filepath: Path, artifact_type: ArtifactType, name: Optional[str] = None, ) -> tuple[Artifact, str]: """Load artifact from file, create in repo if needed.""" if name is None: name = filepath.stem content = filepath.read_text() existing = self.artifact_repo.get_by_name(space, name) if existing: self.artifact_content[existing.id] = content return existing, content artifact = Artifact.create( space_id=space, name=name, content=content, artifact_type=artifact_type ) artifact = self.artifact_repo.create(artifact) self.artifact_content[artifact.id] = content print(f" + {name} ({artifact.content_digest[:8]})") return artifact, content def store_output_artifact( self, space: str, name: str, content: str, artifact_type: ArtifactType ) -> Artifact: """Store a generated output artifact, updating if it already exists.""" existing = self.artifact_repo.get_by_name(space, name) if existing: self.artifact_repo.delete(existing.id) artifact = Artifact.create( space_id=space, name=name, content=content, artifact_type=artifact_type ) artifact = self.artifact_repo.create(artifact) self.artifact_content[artifact.id] = content return artifact def bind_macro_artifact(self, space: str, macro_name: str, content: str) -> Artifact: """Bind content to a macro name in a space (for template resolution).""" existing = self.artifact_repo.get_by_name(space, macro_name) if existing: self.artifact_repo.delete(existing.id) artifact = Artifact.create( space_id=space, name=macro_name, content=content, artifact_type=ArtifactType.CONTENT, ) artifact = self.artifact_repo.create(artifact) self.artifact_content[artifact.id] = content return artifact # ── Setup ──────────────────────────────────────────────────────── def setup(self): """Load all static artifacts (templates, guidelines, VSM reference).""" print("Loading artifacts...") # Templates for tmpl_file in (self.example_dir / "templates").glob("*.md"): self.load_or_create_artifact( self.spaces["templates"], tmpl_file, ArtifactType.TEMPLATE ) # VSM reference for ref_file in (self.example_dir / "artifacts" / "vsm-reference").glob("*.md"): self.load_or_create_artifact( self.spaces["vsm-reference"], ref_file, ArtifactType.CONTENT, name="vsm_framework", ) # Guidelines guideline_name_map = { "extraction-rules.md": "extraction_rules", "mapping-rules.md": "mapping_rules", } for guide_file in (self.example_dir / "artifacts" / "guidelines").glob("*.md"): name = guideline_name_map.get(guide_file.name, guide_file.stem) self.load_or_create_artifact( self.spaces["guidelines"], guide_file, ArtifactType.CONTENT, name=name ) print(" Done.\n") # ── Helpers ─────────────────────────────────────────────────────── @staticmethod def _macro(target: str, kind: MacroKind = MacroKind.REQUIRED) -> ContentMacro: """Create a ContentMacro with correct raw_text for @{target} syntax.""" return ContentMacro(kind=kind, target=target, raw_text=f"@{{{target}}}") # ── Template Resolution ────────────────────────────────────────── def resolve_and_compile( self, template_name: str, macros: list[ContentMacro], extra_spaces: list[str] ) -> Optional[str]: """Resolve macros and compile a template into a final prompt string. Uses the resolver for dependency validation, then performs content substitution from our local cache (since the artifact repository doesn't persist content — see resolver.py line 147). """ template_artifact = self.artifact_repo.get_by_name( self.spaces["templates"], template_name ) if not template_artifact: print(f" ERROR: Template '{template_name}' not found") return None template = PromptTemplate.from_artifact(template_artifact) template.macros = macros template.analyzed = True config = ResolutionConfig( space_id=self.spaces["templates"], included_spaces=[self.spaces[s] for s in extra_spaces], ) strategy = MultiSpaceResolutionStrategy() resolver = PromptResolver(self.artifact_service, strategy) result = resolver.resolve_template(template, config) if not result.success: print(f" ERROR: Resolution failed: {result.context.errors}") return None # Load template content template_content = self.artifact_content.get(template_artifact.id) if not template_content: template_content = ( self.example_dir / "templates" / f"{template_name}.md" ).read_text() # Substitute macros with actual content from cache # (The resolver returns placeholders because the repo doesn't store content) compiled_content = template_content for resolved in result.context.resolved_macros: if resolved.resolved and resolved.artifact: actual_content = self.artifact_content.get(resolved.artifact.id, "") compiled_content = compiled_content.replace( f"@{{{resolved.macro.target}}}", actual_content ) return compiled_content # ── LLM Execution Helper ──────────────────────────────────────── def _execute_llm(self, prompt: str, output_file: Path, stage_label: str) -> Optional[str]: """Execute *prompt* via the configured LLM adapter and write the result. Returns the generated content, or ``None`` on failure. """ from markitect.prompts.execution.models import RunConfig print(f" Calling LLM ({stage_label})...") try: response = self.llm_adapter.execute_prompt(prompt, RunConfig()) except Exception as exc: print(f" LLM error: {exc}") return None content = response.content if not content or not content.strip(): print(f" LLM returned empty content") return None output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(content) print(f" LLM output written to {output_file.name}") return content # ── Pipeline Stages ────────────────────────────────────────────── def stage_extract_entities(self, chapter_id: str, chapter_content: str) -> Optional[str]: """Stage 1: Extract economic entities from a chapter.""" print(f" [1/3] Extracting entities...") # Bind the chapter content to the macro name self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content) macros = [ self._macro("chapter_text"), self._macro("extraction_rules"), self._macro("vsm_framework"), ] prompt = self.resolve_and_compile( "extract-entities", macros, ["sources", "guidelines", "vsm-reference"] ) if not prompt: return None # Write compiled prompt for inspection / LLM execution prompt_file = self.example_dir / "output" / "entities" / f"{chapter_id}-prompt.md" prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") # Check for existing output (manual or LLM-generated) output_file = self.example_dir / "output" / "entities" / f"{chapter_id}-entities.md" if output_file.exists(): content = output_file.read_text() self.store_output_artifact( self.spaces["entities"], f"{chapter_id}-entities", content, ArtifactType.GENERATED, ) print(f" Found existing output: {output_file.name}") return content # Auto-generate via LLM if adapter is available if self.llm_adapter and prompt: content = self._execute_llm(prompt, output_file, "entities") if content: self.store_output_artifact( self.spaces["entities"], f"{chapter_id}-entities", content, ArtifactType.GENERATED, ) return content print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}") return None def stage_map_to_vsm(self, chapter_id: str, entities_content: str) -> Optional[str]: """Stage 2: Map extracted entities to VSM concepts.""" print(f" [2/3] Mapping to VSM...") self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content) macros = [ self._macro("entities"), self._macro("vsm_framework"), self._macro("mapping_rules"), ] prompt = self.resolve_and_compile( "map-to-vsm", macros, ["entities", "vsm-reference", "guidelines"] ) if not prompt: return None prompt_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-prompt.md" prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") output_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-mappings.md" if output_file.exists(): content = output_file.read_text() self.store_output_artifact( self.spaces["mappings"], f"{chapter_id}-mappings", content, ArtifactType.GENERATED, ) print(f" Found existing output: {output_file.name}") return content if self.llm_adapter and prompt: content = self._execute_llm(prompt, output_file, "mappings") if content: self.store_output_artifact( self.spaces["mappings"], f"{chapter_id}-mappings", content, ArtifactType.GENERATED, ) return content print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}") return None def stage_synthesize_analysis( self, chapter_id: str, chapter_content: str, entities_content: str, mappings_content: str ) -> Optional[str]: """Stage 3: Synthesize chapter-level VSM analysis.""" print(f" [3/3] Synthesizing analysis...") self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content) self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content) self.bind_macro_artifact(self.spaces["mappings"], "mappings", mappings_content) macros = [ self._macro("chapter_text"), self._macro("entities"), self._macro("mappings"), self._macro("vsm_framework"), ] prompt = self.resolve_and_compile( "synthesize-analysis", macros, ["sources", "entities", "mappings", "vsm-reference"], ) if not prompt: return None prompt_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-prompt.md" prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") output_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-analysis.md" if output_file.exists(): content = output_file.read_text() self.store_output_artifact( self.spaces["analyses"], f"{chapter_id}-analysis", content, ArtifactType.GENERATED, ) print(f" Found existing output: {output_file.name}") return content if self.llm_adapter and prompt: content = self._execute_llm(prompt, output_file, "analysis") if content: self.store_output_artifact( self.spaces["analyses"], f"{chapter_id}-analysis", content, ArtifactType.GENERATED, ) return content print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}") return None # ── Metrics ────────────────────────────────────────────────────── def assess_metrics(self) -> Optional[str]: """Run the assess-metrics template across all completed analyses.""" print("Assessing metrics...") analyses_dir = self.example_dir / "output" / "analyses" analysis_files = sorted(analyses_dir.glob("*-analysis.md")) if not analysis_files: print(" No completed analyses found. Process chapters first.") return None # Concatenate all analyses all_analyses = [] for f in analysis_files: all_analyses.append(f"\n{f.read_text()}") combined = "\n\n---\n\n".join(all_analyses) self.bind_macro_artifact(self.spaces["analyses"], "all_analyses", combined) macros = [ self._macro("all_analyses"), self._macro("vsm_framework"), ] prompt = self.resolve_and_compile( "assess-metrics", macros, ["analyses", "vsm-reference"] ) if not prompt: return None prompt_file = self.example_dir / "output" / "metrics" / "metrics-prompt.md" prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") output_file = self.example_dir / "output" / "metrics" / "metrics-report.md" if output_file.exists(): content = output_file.read_text() self.store_output_artifact( self.spaces["metrics"], "metrics-report", content, ArtifactType.GENERATED, ) print(f" Found existing output: {output_file.name}") return content if self.llm_adapter and prompt: content = self._execute_llm(prompt, output_file, "metrics") if content: self.store_output_artifact( self.spaces["metrics"], "metrics-report", content, ArtifactType.GENERATED, ) return content print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}") return None # ── Chapter Processing ─────────────────────────────────────────── def process_chapter(self, chapter_id: str, auto_commit: bool = True): """Run the full pipeline for a single chapter.""" source_file = self.example_dir / "artifacts" / "sources" / f"{chapter_id}.md" if not source_file.exists(): print(f"ERROR: Source file not found: {source_file}") return print(f"Processing: {chapter_id}") print(f"{'=' * 60}") chapter_content = source_file.read_text() # Store source artifact self.load_or_create_artifact( self.spaces["sources"], source_file, ArtifactType.CONTENT ) # Stage 1: Extract entities entities = self.stage_extract_entities(chapter_id, chapter_content) if entities is None: print(f"\n Pipeline paused. Generate entities output and re-run.") return # Stage 2: Map to VSM mappings = self.stage_map_to_vsm(chapter_id, entities) if mappings is None: print(f"\n Pipeline paused. Generate mappings output and re-run.") return # Stage 3: Synthesize analysis analysis = self.stage_synthesize_analysis( chapter_id, chapter_content, entities, mappings ) if analysis is None: print(f"\n Pipeline paused. Generate analysis output and re-run.") return print(f"\n Chapter {chapter_id} fully processed.") # Record dependency edges self._record_chapter_dependencies(chapter_id) # Git commit if auto_commit: self._git_commit_chapter(chapter_id) def _record_chapter_dependencies(self, chapter_id: str): """Record dependency edges for a processed chapter.""" run_id = f"run-{chapter_id}" manifest = RunManifest.create( run_id=run_id, template_id="extract-entities", template_name="extract-entities", template_digest="", ) # Source → Run source = self.artifact_repo.get_by_name(self.spaces["sources"], chapter_id) if source: manifest.add_dependency_edge(source.id, run_id, "requires") # Run → Outputs for output_type in ["entities", "mappings", "analyses"]: space = self.spaces[output_type] suffix = {"entities": "entities", "mappings": "mappings", "analyses": "analysis"} name = f"{chapter_id}-{suffix[output_type]}" artifact = self.artifact_repo.get_by_name(space, name) if artifact: manifest.add_dependency_edge(run_id, artifact.id, "generates") try: edges = self.graph_builder.persist_edges(manifest) print(f" Recorded {len(edges)} dependency edges.") except Exception as e: print(f" Warning: Could not record dependencies: {e}") def _git_commit_chapter(self, chapter_id: str): """Commit chapter outputs to git.""" output_dir = self.example_dir / "output" try: subprocess.run( ["git", "add", str(output_dir)], cwd=str(self.example_dir), check=True, capture_output=True, ) subprocess.run( ["git", "commit", "-m", f"infospace: process {chapter_id}\n\n" f"Extract entities, map to VSM, and synthesize analysis\n" f"for {chapter_id}."], cwd=str(project_root), check=True, capture_output=True, ) print(f" Git commit: infospace: process {chapter_id}") except subprocess.CalledProcessError as e: print(f" Warning: Git commit skipped ({e})") # ── Listing ────────────────────────────────────────────────────── def list_chapters(self): """List all available chapters and their processing status.""" sources_dir = self.example_dir / "artifacts" / "sources" chapters = sorted(f.stem for f in sources_dir.glob("*.md")) print(f"Available chapters ({len(chapters)}):\n") print(f" {'Chapter':<30} {'Entities':<12} {'Mappings':<12} {'Analysis':<12}") print(f" {'-'*30} {'-'*12} {'-'*12} {'-'*12}") for ch in chapters: entities = "done" if (self.example_dir / "output" / "entities" / f"{ch}-entities.md").exists() else "-" mappings = "done" if (self.example_dir / "output" / "mappings" / f"{ch}-mappings.md").exists() else "-" analysis = "done" if (self.example_dir / "output" / "analyses" / f"{ch}-analysis.md").exists() else "-" print(f" {ch:<30} {entities:<12} {mappings:<12} {analysis:<12}") # ── Statistics ─────────────────────────────────────────────────── def show_stats(self): """Show dependency graph statistics.""" print("\nDependency Statistics:") try: stats = self.query_service.get_dependency_stats() print(f" Nodes: {stats['total_nodes']}") print(f" Edges: {stats['total_edges']}") print(f" Root artifacts: {stats['root_count']}") print(f" Leaf artifacts: {stats['leaf_count']}") print(f" Has cycles: {stats['has_cycles']}") except Exception as e: print(f" (No data yet: {e})") def main(): parser = argparse.ArgumentParser( description="Process Wealth of Nations chapters through VSM analysis pipeline" ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--chapter", type=str, help="Process a single chapter (e.g., book-1-chapter-01)") group.add_argument("--book", type=int, help="Process all chapters in a book (1-5)") group.add_argument("--all", action="store_true", help="Process all chapters") group.add_argument("--metrics", action="store_true", help="Assess metrics only") group.add_argument("--list", action="store_true", help="List available chapters") group.add_argument("--stats", action="store_true", help="Show dependency statistics") parser.add_argument("--no-commit", action="store_true", help="Skip git commits") parser.add_argument( "--provider", type=str, choices=["openrouter", "claude-code"], default=None, help="LLM provider for auto-generating outputs (omit for manual mode)", ) parser.add_argument("--model", type=str, default=None, help="Model name to pass to the LLM provider") args = parser.parse_args() # Build optional LLM adapter llm_adapter = None if args.provider: from markitect.llm import create_adapter llm_adapter = create_adapter(args.provider, model=args.model) print(f"LLM: {args.provider}" + (f" ({args.model})" if args.model else "")) example_dir = Path(__file__).parent processor = ChapterProcessor(example_dir, llm_adapter=llm_adapter) processor.setup() if args.list: processor.list_chapters() elif args.stats: processor.show_stats() elif args.metrics: processor.assess_metrics() elif args.chapter: processor.process_chapter(args.chapter, auto_commit=not args.no_commit) elif args.book: sources_dir = example_dir / "artifacts" / "sources" chapters = sorted( f.stem for f in sources_dir.glob(f"book-{args.book}-chapter-*.md") ) if not chapters: print(f"No chapters found for Book {args.book}") return print(f"Processing {len(chapters)} chapters from Book {args.book}\n") for ch in chapters: processor.process_chapter(ch, auto_commit=not args.no_commit) print() elif args.all: sources_dir = example_dir / "artifacts" / "sources" chapters = sorted(f.stem for f in sources_dir.glob("*.md")) print(f"Processing all {len(chapters)} chapters\n") for ch in chapters: processor.process_chapter(ch, auto_commit=not args.no_commit) print() processor.show_stats() if __name__ == "__main__": main()