#!/usr/bin/env python3 """ Infospace with History — Chapter Processing Pipeline Processes chapters from Adam Smith's "The Wealth of Nations" through a three-stage analysis pipeline, mapping economic content to Stafford Beer's Viable System Model. Pipeline per chapter: 1. extract-entities — Extract economic entities from chapter text 2. map-to-vsm — Map entities to VSM concepts 3. synthesize-analysis — Produce chapter-level VSM analysis After all chapters: 4. assess-metrics — Evaluate completeness and consistency Usage: # Process a single chapter python process_chapters.py --chapter book-1-chapter-01 # Process all chapters in Book I python process_chapters.py --book 1 # Process all chapters python process_chapters.py --all # Assess metrics only (after chapters have been processed) python process_chapters.py --metrics # List available chapters python process_chapters.py --list """ import argparse import re import subprocess import sys from pathlib import Path from typing import Optional # Add project root to path project_root = Path(__file__).parent.parent.parent sys.path.insert(0, str(project_root)) from markitect.prompts.models import Artifact, ArtifactType from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository from markitect.prompts.dependencies.repository import SQLiteDependencyRepository from markitect.prompts.services.artifact_service import ArtifactService from markitect.prompts.templates.models import PromptTemplate from markitect.prompts.templates.analyzer import TemplateAnalyzer from markitect.prompts.resolver.resolver import PromptResolver from markitect.prompts.resolver.compiler import ContextCompiler from markitect.prompts.resolver.strategy import ResolutionConfig, MultiSpaceResolutionStrategy from markitect.prompts.execution.manifest import RunManifest from markitect.prompts.dependencies.graph import GraphBuilder from markitect.prompts.traceability.service import TraceabilityService from markitect.prompts.queries.operations import PromptQueryService class ChapterProcessor: """Processes Wealth of Nations chapters through the VSM analysis pipeline.""" def __init__( self, example_dir: Path, db_path: Optional[str] = None, llm_adapter=None, ): self.example_dir = example_dir self.db_path = db_path or str(example_dir / "infospace.db") self.llm_adapter = llm_adapter # Initialize repositories self.artifact_repo = SQLiteArtifactRepository(self.db_path) self.dep_repo = SQLiteDependencyRepository(self.db_path) self.artifact_service = ArtifactService(self.artifact_repo) self.graph_builder = GraphBuilder(self.dep_repo) self.trace_service = TraceabilityService( self.artifact_repo, self.dep_repo, db_path=self.db_path ) self.query_service = PromptQueryService( self.artifact_repo, self.dep_repo, db_path=self.db_path ) # Template analysis and compilation self.analyzer = TemplateAnalyzer() self.compiler = ContextCompiler() # Information spaces self.spaces = { "templates": "infospace-templates", "sources": "infospace-sources", "guidelines": "infospace-guidelines", "vsm-reference": "infospace-vsm-reference", "entities": "infospace-entities", "mappings": "infospace-mappings", "analyses": "infospace-analyses", "metrics": "infospace-metrics", } # ── Artifact Management ────────────────────────────────────────── def load_or_create_artifact( self, space: str, filepath: Path, artifact_type: ArtifactType, name: Optional[str] = None, ) -> Artifact: """Load artifact from file, create in repo if needed.""" if name is None: name = filepath.stem content = filepath.read_text() existing = self.artifact_repo.get_by_name(space, name) if existing: return existing artifact = Artifact.create( space_id=space, name=name, content=content, artifact_type=artifact_type ) artifact = self.artifact_repo.create(artifact) print(f" + {name} ({artifact.content_digest[:8]})") return artifact def store_output_artifact( self, space: str, name: str, content: str, artifact_type: ArtifactType ) -> Artifact: """Store a generated output artifact, updating if it already exists.""" existing = self.artifact_repo.get_by_name(space, name) if existing: self.artifact_repo.delete(existing.id) artifact = Artifact.create( space_id=space, name=name, content=content, artifact_type=artifact_type ) artifact = self.artifact_repo.create(artifact) return artifact def bind_macro_artifact(self, space: str, macro_name: str, content: str) -> Artifact: """Bind content to a macro name in a space (for template resolution).""" existing = self.artifact_repo.get_by_name(space, macro_name) if existing: self.artifact_repo.delete(existing.id) artifact = Artifact.create( space_id=space, name=macro_name, content=content, artifact_type=ArtifactType.CONTENT, ) artifact = self.artifact_repo.create(artifact) return artifact # ── Setup ──────────────────────────────────────────────────────── def setup(self): """Load all static artifacts (templates, guidelines, VSM reference).""" print("Loading artifacts...") # Templates for tmpl_file in (self.example_dir / "templates").glob("*.md"): self.load_or_create_artifact( self.spaces["templates"], tmpl_file, ArtifactType.TEMPLATE ) # VSM reference for ref_file in (self.example_dir / "artifacts" / "vsm-reference").glob("*.md"): self.load_or_create_artifact( self.spaces["vsm-reference"], ref_file, ArtifactType.CONTENT, name="vsm_framework", ) # Guidelines guideline_name_map = { "extraction-rules.md": "extraction_rules", "mapping-rules.md": "mapping_rules", } for guide_file in (self.example_dir / "artifacts" / "guidelines").glob("*.md"): name = guideline_name_map.get(guide_file.name, guide_file.stem) self.load_or_create_artifact( self.spaces["guidelines"], guide_file, ArtifactType.CONTENT, name=name ) print(" Done.\n") # ── Template Resolution ────────────────────────────────────────── def resolve_and_compile( self, template_name: str, extra_spaces: list[str] ) -> Optional[str]: """Resolve macros and compile a template into a final prompt string. Uses TemplateAnalyzer to parse @{target} macros from the template, the resolver to look up artifact content, and ContextCompiler to assemble the final prompt. """ template_artifact = self.artifact_repo.get_by_name( self.spaces["templates"], template_name ) if not template_artifact: print(f" ERROR: Template '{template_name}' not found") return None template = PromptTemplate.from_artifact(template_artifact) template_content = template_artifact.content # Analyze template to extract @{target} macros self.analyzer.analyze(template, template_content) config = ResolutionConfig( space_id=self.spaces["templates"], included_spaces=[self.spaces[s] for s in extra_spaces], ) strategy = MultiSpaceResolutionStrategy() resolver = PromptResolver(self.artifact_service, strategy) result = resolver.resolve_template(template, config) if not result.success: print(f" ERROR: Resolution failed: {result.context.errors}") return None # Compile template with resolved content compiled = self.compiler.compile(template, template_content, result) return compiled.content # ── LLM Execution Helpers ───────────────────────────────────────── def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 8192) -> Optional[str]: """Call the LLM and return the content string, or ``None`` on failure. Retries up to 3 times on rate-limit (429) errors with exponential backoff. Does **not** write any files — callers decide where to persist. """ import time as _time from markitect.prompts.execution.models import RunConfig from markitect.llm.exceptions import LLMRateLimitError print(f" Calling LLM ({stage_label})...") t0 = _time.time() max_retries = 3 for attempt in range(max_retries + 1): try: response = self.llm_adapter.execute_prompt(prompt, RunConfig(max_tokens=max_tokens)) break # success except LLMRateLimitError as exc: if attempt < max_retries: wait = 15 * (attempt + 1) # 15, 30, 45 seconds print(f" Rate limited, retrying in {wait}s (attempt {attempt + 1}/{max_retries})...") _time.sleep(wait) else: print(f" LLM rate limit after {max_retries} retries ({_time.time() - t0:.1f}s): {exc}") return None except Exception as exc: print(f" LLM error ({_time.time() - t0:.1f}s): {exc}") return None elapsed = _time.time() - t0 usage = response.usage print( f" LLM done in {elapsed:.1f}s — " f"prompt {usage.get('prompt_tokens', '?')} tok, " f"completion {usage.get('completion_tokens', '?')} tok, " f"total {usage.get('total_tokens', '?')} tok" ) content = response.content if not content or not content.strip(): print(f" LLM returned empty content") return None return content def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 8192) -> Optional[str]: """Call the LLM, write the result to *output_file*, and return it.""" content = self._call_llm(prompt, stage_label, max_tokens=max_tokens) if content: output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(content) print(f" LLM output written to {output_file.name}") return content # ── Entity Management (flat canonical set) ───────────────────── @staticmethod def _normalize_entity_name(name: str) -> str: """Normalize an entity name to a kebab-case filename stem.""" slug = name.lower().strip() slug = slug.replace("_", "-").replace(" ", "-") slug = re.sub(r"[^a-z0-9-]", "", slug) slug = re.sub(r"-{2,}", "-", slug) return slug.strip("-") def _entities_dir(self) -> Path: return self.example_dir / "output" / "entities" def _archive_dir(self) -> Path: return self._entities_dir() / "archive" def _list_existing_entity_names(self) -> list[str]: """Return sorted slugs of all canonical entity files already on disk.""" return sorted( f.stem for f in self._entities_dir().glob("*.md") if not f.name.endswith("-entities.md") and not f.name.endswith("-prompt.md") ) def archive_entity(self, slug: str, reason: str) -> None: """Move a canonical entity to the archive with a documented reason. The entity file is prepended with an archive header explaining why it was retired, then moved to ``output/entities/archive/.md``. Chapter views that reference this entity are **not** updated automatically — review and update them manually. """ src = self._entities_dir() / f"{slug}.md" if not src.exists(): print(f" Entity not found: {slug}") return archive = self._archive_dir() archive.mkdir(parents=True, exist_ok=True) dest = archive / f"{slug}.md" from datetime import date header = ( f"\n\n" ) content = src.read_text() dest.write_text(header + content) src.unlink() # Report which chapter views still reference this entity refs = [] for view in self._entities_dir().glob("*-entities.md"): if f'include "{slug}.md"' in view.read_text(): refs.append(view.name) print(f" Archived: {slug}.md -> archive/{slug}.md") print(f" Reason: {reason}") if refs: print(f" Referenced by: {', '.join(refs)} (update these views)") print(f" Canonical set: {len(self._list_existing_entity_names())} entities") def _split_entities( self, combined_content: str ) -> list[tuple[str, Path]]: """Split combined LLM output into the flat canonical entity directory. Writes each entity to ``output/entities/.md``. If a file with that slug already exists it is **skipped** (first-occurrence wins), but the entity is still included in the returned list so the chapter view can reference it. Returns list of (entity_name, file_path) for every entity in *combined_content* (new and pre-existing alike). """ entities_dir = self._entities_dir() entities_dir.mkdir(parents=True, exist_ok=True) parts = re.split( r"^---\s*ENTITY:\s*(.+?)\s*---\s*$", combined_content, flags=re.MULTILINE, ) entity_files: list[tuple[str, Path]] = [] new_count = 0 skipped_count = 0 for i in range(1, len(parts), 2): entity_name = parts[i] entity_content = parts[i + 1].strip() if i + 1 < len(parts) else "" slug = self._normalize_entity_name(entity_name) if not slug: continue file_path = entities_dir / f"{slug}.md" if file_path.exists(): skipped_count += 1 else: file_path.write_text(entity_content + "\n") new_count += 1 entity_files.append((entity_name, file_path)) msg = f" {new_count} new entities written" if skipped_count: msg += f", {skipped_count} pre-existing (skipped)" print(msg) return entity_files def _write_chapter_entity_view( self, chapter_id: str, entity_files: list[tuple[str, Path]] ) -> Path: """Write a per-chapter view file that transcludes individual entities.""" parts = chapter_id.split("-") book_num = int(parts[1]) if len(parts) >= 2 else 1 ch_num = int(parts[3]) if len(parts) >= 4 else 0 roman = {1: "I", 2: "II", 3: "III", 4: "IV", 5: "V"}.get(book_num, str(book_num)) title = f"# Economic Entities — Book {roman}, Chapter {ch_num}\n" lines = [title] for _name, file_path in entity_files: lines.append(f'{{{{ include "{file_path.name}" }}}}') lines.append("") lines.append("---") lines.append("") # Remove trailing separator after last entity if lines and lines[-1] == "" and len(lines) >= 3 and lines[-2] == "---": lines = lines[:-2] view_path = self._entities_dir() / f"{chapter_id}-entities.md" view_path.write_text("\n".join(lines) + "\n") print(f" Chapter view written to {view_path.name}") return view_path def _read_entities_from_view( self, chapter_id: str ) -> tuple[str, list[tuple[str, Path]]]: """Reconstruct combined entity content from a chapter view file. Parses ``{{ include "..." }}`` directives in the view to discover which canonical entity files belong to this chapter, reads them, and rebuilds the delimited combined content needed by downstream stages. """ from markitect.packaging.transclusion.directives import DirectiveParser view_path = self._entities_dir() / f"{chapter_id}-entities.md" view_content = view_path.read_text() includes = DirectiveParser.extract_file_includes(view_content) entities_dir = self._entities_dir() entity_files: list[tuple[str, Path]] = [] parts: list[str] = [] for rel_path in includes: file_path = entities_dir / rel_path if not file_path.exists(): continue slug = file_path.stem body = file_path.read_text().strip() parts.append(f"--- ENTITY: {slug} ---\n\n{body}") entity_files.append((slug, file_path)) combined = "\n\n".join(parts) + "\n" if parts else "" return combined, entity_files # ── Pipeline Stages ────────────────────────────────────────────── def stage_extract_entities(self, chapter_id: str, chapter_content: str) -> Optional[str]: """Stage 1: Extract economic entities from a chapter. Canonical entity files live in a **flat** directory (``output/entities/.md``). Duplicates across chapters are skipped — first occurrence wins. The per-chapter view file (``-entities.md``) is a **secondary** transclusion view that ``{{ include }}``s each entity relevant to the chapter. """ print(f" [1/3] Extracting entities...") # Bind the chapter content to the macro name self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content) # Bind existing entity list so the LLM knows what already exists existing = self._list_existing_entity_names() if existing: entity_list = "\n".join(f"- {name}" for name in existing) else: entity_list = "(none — this is the first chapter)" self.bind_macro_artifact( self.spaces["entities"], "existing_entities", entity_list ) prompt = self.resolve_and_compile( "extract-entities", ["sources", "guidelines", "vsm-reference", "entities"], ) if not prompt: return None # Write compiled prompt for inspection prompt_file = self._entities_dir() / f"{chapter_id}-prompt.md" prompt_file.parent.mkdir(parents=True, exist_ok=True) prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") view_file = self._entities_dir() / f"{chapter_id}-entities.md" # ── PRIMARY: chapter view with transclusion already on disk ── if view_file.exists() and "{{ include" in view_file.read_text(): content, entity_files = self._read_entities_from_view(chapter_id) self.store_output_artifact( self.spaces["entities"], f"{chapter_id}-entities", content, ArtifactType.GENERATED, ) print(f" Found chapter view referencing {len(entity_files)} entities") return content # ── MIGRATION: per-chapter subdirectory (previous format) ── subdir = self._entities_dir() / chapter_id if subdir.is_dir() and list(subdir.glob("*.md")): print(f" Migrating per-chapter subdir: {chapter_id}/") entity_files: list[tuple[str, Path]] = [] entities_dir = self._entities_dir() for src in sorted(subdir.glob("*.md")): dest = entities_dir / src.name if not dest.exists(): src.rename(dest) entity_files.append((src.stem, dest)) # Clean up empty subdir if not list(subdir.glob("*")): subdir.rmdir() self._write_chapter_entity_view(chapter_id, entity_files) content = self._read_entities_from_view(chapter_id)[0] self.store_output_artifact( self.spaces["entities"], f"{chapter_id}-entities", content, ArtifactType.GENERATED, ) return content # ── MIGRATION: legacy combined file (pre-split format) ── if view_file.exists(): raw = view_file.read_text() if "--- ENTITY:" in raw: print(f" Migrating legacy combined file: {view_file.name}") entity_files = self._split_entities(raw) self._write_chapter_entity_view(chapter_id, entity_files) self.store_output_artifact( self.spaces["entities"], f"{chapter_id}-entities", raw, ArtifactType.GENERATED, ) return raw # ── GENERATE: call LLM, persist individual files first ── if self.llm_adapter and prompt: combined = self._call_llm(prompt, "entities") if combined: entity_files = self._split_entities(combined) self._write_chapter_entity_view(chapter_id, entity_files) self.store_output_artifact( self.spaces["entities"], f"{chapter_id}-entities", combined, ArtifactType.GENERATED, ) return combined print(f" Awaiting entity files in: output/entities/") return None def stage_map_to_vsm(self, chapter_id: str, entities_content: str) -> Optional[str]: """Stage 2: Map extracted entities to VSM concepts.""" print(f" [2/3] Mapping to VSM...") self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content) prompt = self.resolve_and_compile( "map-to-vsm", ["entities", "vsm-reference", "guidelines"] ) if not prompt: return None prompt_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-prompt.md" prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") output_file = self.example_dir / "output" / "mappings" / f"{chapter_id}-mappings.md" if output_file.exists(): content = output_file.read_text() self.store_output_artifact( self.spaces["mappings"], f"{chapter_id}-mappings", content, ArtifactType.GENERATED, ) print(f" Found existing output: {output_file.name}") return content if self.llm_adapter and prompt: content = self._execute_llm(prompt, output_file, "mappings") if content: self.store_output_artifact( self.spaces["mappings"], f"{chapter_id}-mappings", content, ArtifactType.GENERATED, ) return content print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}") return None def stage_synthesize_analysis( self, chapter_id: str, chapter_content: str, entities_content: str, mappings_content: str ) -> Optional[str]: """Stage 3: Synthesize chapter-level VSM analysis.""" print(f" [3/3] Synthesizing analysis...") self.bind_macro_artifact(self.spaces["sources"], "chapter_text", chapter_content) self.bind_macro_artifact(self.spaces["entities"], "entities", entities_content) self.bind_macro_artifact(self.spaces["mappings"], "mappings", mappings_content) prompt = self.resolve_and_compile( "synthesize-analysis", ["sources", "entities", "mappings", "vsm-reference"], ) if not prompt: return None prompt_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-prompt.md" prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") output_file = self.example_dir / "output" / "analyses" / f"{chapter_id}-analysis.md" if output_file.exists(): content = output_file.read_text() self.store_output_artifact( self.spaces["analyses"], f"{chapter_id}-analysis", content, ArtifactType.GENERATED, ) print(f" Found existing output: {output_file.name}") return content if self.llm_adapter and prompt: content = self._execute_llm(prompt, output_file, "analysis") if content: self.store_output_artifact( self.spaces["analyses"], f"{chapter_id}-analysis", content, ArtifactType.GENERATED, ) return content print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}") return None # ── Metrics ────────────────────────────────────────────────────── def assess_metrics(self) -> Optional[str]: """Run the assess-metrics template across all completed analyses.""" print("Assessing metrics...") analyses_dir = self.example_dir / "output" / "analyses" analysis_files = sorted(analyses_dir.glob("*-analysis.md")) if not analysis_files: print(" No completed analyses found. Process chapters first.") return None # Concatenate all analyses all_analyses = [] for f in analysis_files: all_analyses.append(f"\n{f.read_text()}") combined = "\n\n---\n\n".join(all_analyses) self.bind_macro_artifact(self.spaces["analyses"], "all_analyses", combined) prompt = self.resolve_and_compile( "assess-metrics", ["analyses", "vsm-reference"] ) if not prompt: return None prompt_file = self.example_dir / "output" / "metrics" / "metrics-prompt.md" prompt_file.write_text(prompt) print(f" Prompt written to {prompt_file.relative_to(self.example_dir)}") output_file = self.example_dir / "output" / "metrics" / "metrics-report.md" if output_file.exists(): content = output_file.read_text() self.store_output_artifact( self.spaces["metrics"], "metrics-report", content, ArtifactType.GENERATED, ) print(f" Found existing output: {output_file.name}") return content if self.llm_adapter and prompt: content = self._execute_llm(prompt, output_file, "metrics") if content: self.store_output_artifact( self.spaces["metrics"], "metrics-report", content, ArtifactType.GENERATED, ) return content print(f" Awaiting output at: {output_file.relative_to(self.example_dir)}") return None # ── Chapter Processing ─────────────────────────────────────────── def process_chapter(self, chapter_id: str, auto_commit: bool = True): """Run the full pipeline for a single chapter.""" source_file = self.example_dir / "artifacts" / "sources" / f"{chapter_id}.md" if not source_file.exists(): print(f"ERROR: Source file not found: {source_file}") return print(f"Processing: {chapter_id}") print(f"{'=' * 60}") chapter_content = source_file.read_text() # Store source artifact self.load_or_create_artifact( self.spaces["sources"], source_file, ArtifactType.CONTENT ) # Stage 1: Extract entities entities = self.stage_extract_entities(chapter_id, chapter_content) if entities is None: print(f"\n Pipeline paused. Generate entities output and re-run.") return # Stage 2: Map to VSM mappings = self.stage_map_to_vsm(chapter_id, entities) if mappings is None: print(f"\n Pipeline paused. Generate mappings output and re-run.") return # Stage 3: Synthesize analysis analysis = self.stage_synthesize_analysis( chapter_id, chapter_content, entities, mappings ) if analysis is None: print(f"\n Pipeline paused. Generate analysis output and re-run.") return print(f"\n Chapter {chapter_id} fully processed.") # Record dependency edges self._record_chapter_dependencies(chapter_id) # Git commit if auto_commit: self._git_commit_chapter(chapter_id) def _record_chapter_dependencies(self, chapter_id: str): """Record dependency edges for a processed chapter.""" run_id = f"run-{chapter_id}" manifest = RunManifest.create( run_id=run_id, template_id="extract-entities", template_name="extract-entities", template_digest="", ) # Source → Run source = self.artifact_repo.get_by_name(self.spaces["sources"], chapter_id) if source: manifest.add_dependency_edge(source.id, run_id, "requires") # Run → Outputs for output_type in ["entities", "mappings", "analyses"]: space = self.spaces[output_type] suffix = {"entities": "entities", "mappings": "mappings", "analyses": "analysis"} name = f"{chapter_id}-{suffix[output_type]}" artifact = self.artifact_repo.get_by_name(space, name) if artifact: manifest.add_dependency_edge(run_id, artifact.id, "generates") try: edges = self.graph_builder.persist_edges(manifest) print(f" Recorded {len(edges)} dependency edges.") except Exception as e: print(f" Warning: Could not record dependencies: {e}") def _git_commit_chapter(self, chapter_id: str): """Commit chapter outputs to git.""" output_dir = self.example_dir / "output" try: subprocess.run( ["git", "add", str(output_dir)], cwd=str(self.example_dir), check=True, capture_output=True, ) subprocess.run( ["git", "commit", "-m", f"infospace: process {chapter_id}\n\n" f"Extract entities, map to VSM, and synthesize analysis\n" f"for {chapter_id}."], cwd=str(project_root), check=True, capture_output=True, ) print(f" Git commit: infospace: process {chapter_id}") except subprocess.CalledProcessError as e: print(f" Warning: Git commit skipped ({e})") # ── Listing ────────────────────────────────────────────────────── def list_chapters(self): """List all available chapters and their processing status.""" sources_dir = self.example_dir / "artifacts" / "sources" chapters = sorted(f.stem for f in sources_dir.glob("*.md")) print(f"Available chapters ({len(chapters)}):\n") print(f" {'Chapter':<30} {'Entities':<12} {'Mappings':<12} {'Analysis':<12}") print(f" {'-'*30} {'-'*12} {'-'*12} {'-'*12}") for ch in chapters: view_file = self._entities_dir() / f"{ch}-entities.md" entity_count = 0 if view_file.exists() and "{{ include" in view_file.read_text(): from markitect.packaging.transclusion.directives import DirectiveParser entity_count = len(DirectiveParser.extract_file_includes(view_file.read_text())) entities = f"done ({entity_count})" if entity_count else "-" mappings = "done" if (self.example_dir / "output" / "mappings" / f"{ch}-mappings.md").exists() else "-" analysis = "done" if (self.example_dir / "output" / "analyses" / f"{ch}-analysis.md").exists() else "-" print(f" {ch:<30} {entities:<12} {mappings:<12} {analysis:<12}") total_entities = len(self._list_existing_entity_names()) if total_entities: print(f"\n Canonical entity set: {total_entities} unique entities") archive = self._archive_dir() if archive.exists(): archived = len(list(archive.glob("*.md"))) if archived: print(f" Archived entities: {archived}") # ── Statistics ─────────────────────────────────────────────────── def show_stats(self): """Show dependency graph statistics.""" print("\nDependency Statistics:") try: stats = self.query_service.get_dependency_stats() print(f" Nodes: {stats['total_nodes']}") print(f" Edges: {stats['total_edges']}") print(f" Root artifacts: {stats['root_count']}") print(f" Leaf artifacts: {stats['leaf_count']}") print(f" Has cycles: {stats['has_cycles']}") except Exception as e: print(f" (No data yet: {e})") # ── Infospace tooling integration ───────────────────────────────── def _load_infospace(example_dir: Path): """Load infospace config and entities from the example directory.""" from markitect.infospace.config import load_infospace_config from markitect.infospace.entity_parser import parse_entity_directory config_path = example_dir / "infospace.yaml" if not config_path.is_file(): print("Error: No infospace.yaml found. Create one first.") sys.exit(1) config = load_infospace_config(config_path) entities_dir = example_dir / config.entities_dir entities = parse_entity_directory(entities_dir) if entities_dir.is_dir() else [] return config, config_path, entities def _run_infospace_status(example_dir: Path): """Show infospace status using the tooling layer.""" from markitect.infospace.state import build_state config, config_path, entities = _load_infospace(example_dir) state = build_state(config, entities=entities) print(f"Infospace: {state.topic_name}") print(f"Domain: {config.topic.domain}") print(f"Entities: {state.entity_count}") if state.domains: print(f"Domains: {', '.join(state.domains)}") if config.disciplines: names = [d.name for d in config.disciplines] print(f"Disciplines: {', '.join(names)}") # Show processing progress sources_dir = example_dir / "artifacts" / "sources" total_chapters = len(list(sources_dir.glob("*.md"))) processed = len(list((example_dir / "output" / "analyses").glob("*-analysis.md"))) print(f"Chapters: {processed}/{total_chapters} processed") def _run_infospace_check(example_dir: Path): """Run collection-level quality checks.""" from markitect.infospace.checks import run_all_checks from markitect.infospace.history import record_check_results config, config_path, entities = _load_infospace(example_dir) if not entities: print("No entities to check.") return print(f"Running collection checks on {len(entities)} entities...\n") report = run_all_checks(entities=entities) d = report.to_dict() for concern_name, concern_data in d.items(): label = concern_data.get("concern", concern_name.upper()) print(f" {label} — {concern_name}") for k, v in concern_data.items(): if k == "concern": continue print(f" {k}: {v}") print() m = report.metrics() if m: print("Metrics summary:") for k, v in sorted(m.items()): print(f" {k}: {v:.4f}") snap = record_check_results(report, config, example_dir, entity_count=len(entities)) print(f"\nRecorded snapshot {snap.snapshot_id}") def _run_infospace_viability(example_dir: Path): """Show viability dashboard.""" from markitect.infospace.history import read_metrics_file from markitect.infospace.state import build_state config, config_path, entities = _load_infospace(example_dir) if not config.viability: print("No viability thresholds configured.") return metrics = read_metrics_file(example_dir / config.metrics_dir / "metrics.yaml") if not metrics: print("No metrics available. Run --infospace-check first.") print("\nConfigured thresholds:") for name, t in config.viability.items(): bounds = [] if t.min is not None: bounds.append(f"min={t.min}") if t.max is not None: bounds.append(f"max={t.max}") print(f" {name}: {', '.join(bounds)}") return state = build_state(config, entities=entities, metrics=metrics) print(f"{'Metric':<30} {'Value':>8} {'Threshold':>15} {'Status':>8}") print("-" * 63) for r in state.viability_results: bounds = [] if r.threshold.min is not None: bounds.append(f"min={r.threshold.min}") if r.threshold.max is not None: bounds.append(f"max={r.threshold.max}") status_str = "PASS" if r.passed else "FAIL" print(f"{r.metric:<30} {r.value:>8.4f} {', '.join(bounds):>15} {status_str:>8}") print() if state.is_viable: print(f"Viable: YES ({state.viability_pass_count}/{state.viability_total_count} thresholds met)") else: print(f"Viable: NO ({state.viability_pass_count}/{state.viability_total_count} thresholds met)") def main(): parser = argparse.ArgumentParser( description="Process Wealth of Nations chapters through VSM analysis pipeline" ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--chapter", type=str, help="Process a single chapter (e.g., book-1-chapter-01)") group.add_argument("--book", type=int, help="Process all chapters in a book (1-5)") group.add_argument("--all", action="store_true", help="Process all chapters") group.add_argument("--metrics", action="store_true", help="Assess metrics only") group.add_argument("--list", action="store_true", help="List available chapters") group.add_argument("--stats", action="store_true", help="Show dependency statistics") group.add_argument("--archive-entity", type=str, metavar="SLUG", help="Archive an entity (move to archive/ with reason)") group.add_argument("--infospace-status", action="store_true", help="Show infospace status via infospace tooling") group.add_argument("--infospace-check", action="store_true", help="Run collection-level quality checks (C1-C5)") group.add_argument("--infospace-viability", action="store_true", help="Show viability dashboard") parser.add_argument("--reason", type=str, default=None, help="Reason for archiving (used with --archive-entity)") parser.add_argument("--no-commit", action="store_true", help="Skip git commits") parser.add_argument( "--provider", type=str, choices=["openrouter", "claude-code", "gemini", "openai"], default=None, help="LLM provider for auto-generating outputs (omit for manual mode)", ) parser.add_argument("--model", type=str, default=None, help="Model name to pass to the LLM provider") args = parser.parse_args() # Build optional LLM adapter _PROVIDER_DEFAULTS = { "openrouter": "arcee-ai/trinity-large-preview:free", } llm_adapter = None if args.provider: from markitect.llm import create_adapter model = args.model or _PROVIDER_DEFAULTS.get(args.provider) llm_adapter = create_adapter(args.provider, model=model) print(f"LLM: {args.provider} ({model or 'default'})") example_dir = Path(__file__).parent processor = ChapterProcessor(example_dir, llm_adapter=llm_adapter) processor.setup() if args.archive_entity: if not args.reason: parser.error("--archive-entity requires --reason") processor.archive_entity(args.archive_entity, args.reason) elif args.list: processor.list_chapters() elif args.stats: processor.show_stats() elif args.metrics: processor.assess_metrics() elif args.chapter: processor.process_chapter(args.chapter, auto_commit=not args.no_commit) elif args.book: sources_dir = example_dir / "artifacts" / "sources" chapters = sorted( f.stem for f in sources_dir.glob(f"book-{args.book}-chapter-*.md") ) if not chapters: print(f"No chapters found for Book {args.book}") return print(f"Processing {len(chapters)} chapters from Book {args.book}\n") for ch in chapters: processor.process_chapter(ch, auto_commit=not args.no_commit) print() elif args.all: sources_dir = example_dir / "artifacts" / "sources" chapters = sorted(f.stem for f in sources_dir.glob("*.md")) print(f"Processing all {len(chapters)} chapters\n") for ch in chapters: processor.process_chapter(ch, auto_commit=not args.no_commit) print() elif args.infospace_status: _run_infospace_status(example_dir) return elif args.infospace_check: _run_infospace_check(example_dir) return elif args.infospace_viability: _run_infospace_viability(example_dir) return processor.show_stats() if __name__ == "__main__": main()