feat(llm): add OpenAI adapter, entity archive policy, process chapters 5-7

Add OpenAIAdapter for the OpenAI chat completions API (apikey-chatgpt.txt or OPENAI_API_KEY). Set default model to arcee-ai/trinity-large-preview:free for the infospace pipeline and increase max_tokens from 4096 to 8192. Reprocess chapter 05 with Trinity Large (was Gemini: 1 truncated entity, now 19 complete entities). Process chapters 06 (Aurora Alpha, 10 entities) and 07 (Trinity Large, 15 entities including regenerated violent-policy.md). Canonical set now at 85 unique entities. Add entity archive policy: entities are never silently deleted. Retired entities move to output/entities/archive/ with a dated reason header. New CLI option: --archive-entity <slug> --reason "...". The --list output shows the archive count alongside the canonical set. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:39:44 +01:00
parent 880c1d1374
commit 41773f1320
68 changed files with 6500 additions and 136 deletions
--- a/examples/infospace-with-history/process_chapters.py
+++ b/examples/infospace-with-history/process_chapters.py
@@ -228,7 +228,7 @@ class ChapterProcessor:

    # ── LLM Execution Helpers ─────────────────────────────────────────

-    def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
+    def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
        """Call the LLM and return the content string, or ``None`` on failure.

        Retries up to 3 times on rate-limit (429) errors with exponential backoff.
@@ -273,7 +273,7 @@ class ChapterProcessor:

        return content

-    def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
+    def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 8192) -> Optional[str]:
        """Call the LLM, write the result to *output_file*, and return it."""
        content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
        if content:
@@ -296,6 +296,9 @@ class ChapterProcessor:
    def _entities_dir(self) -> Path:
        return self.example_dir / "output" / "entities"

+    def _archive_dir(self) -> Path:
+        return self._entities_dir() / "archive"
+
    def _list_existing_entity_names(self) -> list[str]:
        """Return sorted slugs of all canonical entity files already on disk."""
        return sorted(
@@ -305,6 +308,45 @@ class ChapterProcessor:
            and not f.name.endswith("-prompt.md")
        )

+    def archive_entity(self, slug: str, reason: str) -> None:
+        """Move a canonical entity to the archive with a documented reason.
+
+        The entity file is prepended with an archive header explaining why
+        it was retired, then moved to ``output/entities/archive/<slug>.md``.
+        Chapter views that reference this entity are **not** updated
+        automatically — review and update them manually.
+        """
+        src = self._entities_dir() / f"{slug}.md"
+        if not src.exists():
+            print(f"  Entity not found: {slug}")
+            return
+
+        archive = self._archive_dir()
+        archive.mkdir(parents=True, exist_ok=True)
+        dest = archive / f"{slug}.md"
+
+        from datetime import date
+        header = (
+            f"<!-- ARCHIVED {date.today().isoformat()}\n"
+            f"     Reason: {reason}\n"
+            f"-->\n\n"
+        )
+        content = src.read_text()
+        dest.write_text(header + content)
+        src.unlink()
+
+        # Report which chapter views still reference this entity
+        refs = []
+        for view in self._entities_dir().glob("*-entities.md"):
+            if f'include "{slug}.md"' in view.read_text():
+                refs.append(view.name)
+
+        print(f"  Archived: {slug}.md -> archive/{slug}.md")
+        print(f"  Reason: {reason}")
+        if refs:
+            print(f"  Referenced by: {', '.join(refs)} (update these views)")
+        print(f"  Canonical set: {len(self._list_existing_entity_names())} entities")
+
    def _split_entities(
        self, combined_content: str
    ) -> list[tuple[str, Path]]:
@@ -792,6 +834,11 @@ class ChapterProcessor:
        total_entities = len(self._list_existing_entity_names())
        if total_entities:
            print(f"\n  Canonical entity set: {total_entities} unique entities")
+        archive = self._archive_dir()
+        if archive.exists():
+            archived = len(list(archive.glob("*.md")))
+            if archived:
+                print(f"  Archived entities: {archived}")

    # ── Statistics ───────────────────────────────────────────────────

@@ -820,12 +867,16 @@ def main():
    group.add_argument("--metrics", action="store_true", help="Assess metrics only")
    group.add_argument("--list", action="store_true", help="List available chapters")
    group.add_argument("--stats", action="store_true", help="Show dependency statistics")
+    group.add_argument("--archive-entity", type=str, metavar="SLUG",
+                       help="Archive an entity (move to archive/ with reason)")

+    parser.add_argument("--reason", type=str, default=None,
+                        help="Reason for archiving (used with --archive-entity)")
    parser.add_argument("--no-commit", action="store_true", help="Skip git commits")
    parser.add_argument(
        "--provider",
        type=str,
-        choices=["openrouter", "claude-code", "gemini"],
+        choices=["openrouter", "claude-code", "gemini", "openai"],
        default=None,
        help="LLM provider for auto-generating outputs (omit for manual mode)",
    )
@@ -834,17 +885,25 @@ def main():
    args = parser.parse_args()

    # Build optional LLM adapter
+    _PROVIDER_DEFAULTS = {
+        "openrouter": "arcee-ai/trinity-large-preview:free",
+    }
    llm_adapter = None
    if args.provider:
        from markitect.llm import create_adapter
-        llm_adapter = create_adapter(args.provider, model=args.model)
-        print(f"LLM: {args.provider}" + (f" ({args.model})" if args.model else ""))
+        model = args.model or _PROVIDER_DEFAULTS.get(args.provider)
+        llm_adapter = create_adapter(args.provider, model=model)
+        print(f"LLM: {args.provider} ({model or 'default'})")

    example_dir = Path(__file__).parent
    processor = ChapterProcessor(example_dir, llm_adapter=llm_adapter)
    processor.setup()

-    if args.list:
+    if args.archive_entity:
+        if not args.reason:
+            parser.error("--archive-entity requires --reason")
+        processor.archive_entity(args.archive_entity, args.reason)
+    elif args.list:
        processor.list_chapters()
    elif args.stats:
        processor.show_stats()