feat(llm): add Gemini adapter and process book-1-chapter-05

Add GeminiAdapter calling Google's Generative Language REST API (default model: gemini-2.5-flash). Register "gemini" as third provider in the factory and CLI. Add rate-limit retry with exponential backoff to the pipeline's _call_llm helper. Increase default max_tokens from 2000 to 4096. Process book-1-chapter-05 via Gemini free tier — 1 new entity extracted (necessaries-conveniencies-and-amusements-of-life), 41 existing entities correctly skipped by dedup. Canonical set now at 42 unique entities. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 22:54:37 +01:00
parent 2d1282a61e
commit 880c1d1374
22 changed files with 12008 additions and 57 deletions
--- a/examples/infospace-with-history/process_chapters.py
+++ b/examples/infospace-with-history/process_chapters.py
@@ -228,21 +228,34 @@ class ChapterProcessor:

    # ── LLM Execution Helpers ─────────────────────────────────────────

-    def _call_llm(self, prompt: str, stage_label: str) -> Optional[str]:
+    def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
        """Call the LLM and return the content string, or ``None`` on failure.

+        Retries up to 3 times on rate-limit (429) errors with exponential backoff.
        Does **not** write any files — callers decide where to persist.
        """
        import time as _time
        from markitect.prompts.execution.models import RunConfig
+        from markitect.llm.exceptions import LLMRateLimitError

        print(f"        Calling LLM ({stage_label})...")
        t0 = _time.time()
-        try:
-            response = self.llm_adapter.execute_prompt(prompt, RunConfig())
-        except Exception as exc:
-            print(f"        LLM error ({_time.time() - t0:.1f}s): {exc}")
-            return None
+        max_retries = 3
+        for attempt in range(max_retries + 1):
+            try:
+                response = self.llm_adapter.execute_prompt(prompt, RunConfig(max_tokens=max_tokens))
+                break  # success
+            except LLMRateLimitError as exc:
+                if attempt < max_retries:
+                    wait = 15 * (attempt + 1)  # 15, 30, 45 seconds
+                    print(f"        Rate limited, retrying in {wait}s (attempt {attempt + 1}/{max_retries})...")
+                    _time.sleep(wait)
+                else:
+                    print(f"        LLM rate limit after {max_retries} retries ({_time.time() - t0:.1f}s): {exc}")
+                    return None
+            except Exception as exc:
+                print(f"        LLM error ({_time.time() - t0:.1f}s): {exc}")
+                return None

        elapsed = _time.time() - t0
        usage = response.usage
@@ -260,9 +273,9 @@ class ChapterProcessor:

        return content

-    def _execute_llm(self, prompt: str, output_file: Path, stage_label: str) -> Optional[str]:
+    def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
        """Call the LLM, write the result to *output_file*, and return it."""
-        content = self._call_llm(prompt, stage_label)
+        content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
        if content:
            output_file.parent.mkdir(parents=True, exist_ok=True)
            output_file.write_text(content)
@@ -812,7 +825,7 @@ def main():
    parser.add_argument(
        "--provider",
        type=str,
-        choices=["openrouter", "claude-code"],
+        choices=["openrouter", "claude-code", "gemini"],
        default=None,
        help="LLM provider for auto-generating outputs (omit for manual mode)",
    )