feat(llm): add Gemini adapter and process book-1-chapter-05

Add GeminiAdapter calling Google's Generative Language REST API
(default model: gemini-2.5-flash). Register "gemini" as third
provider in the factory and CLI. Add rate-limit retry with
exponential backoff to the pipeline's _call_llm helper. Increase
default max_tokens from 2000 to 4096.

Process book-1-chapter-05 via Gemini free tier — 1 new entity
extracted (necessaries-conveniencies-and-amusements-of-life),
41 existing entities correctly skipped by dedup. Canonical set
now at 42 unique entities.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-11 22:54:37 +01:00
parent 2d1282a61e
commit 880c1d1374
22 changed files with 12008 additions and 57 deletions

View File

@@ -228,21 +228,34 @@ class ChapterProcessor:
# ── LLM Execution Helpers ─────────────────────────────────────────
def _call_llm(self, prompt: str, stage_label: str) -> Optional[str]:
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
"""Call the LLM and return the content string, or ``None`` on failure.
Retries up to 3 times on rate-limit (429) errors with exponential backoff.
Does **not** write any files — callers decide where to persist.
"""
import time as _time
from markitect.prompts.execution.models import RunConfig
from markitect.llm.exceptions import LLMRateLimitError
print(f" Calling LLM ({stage_label})...")
t0 = _time.time()
try:
response = self.llm_adapter.execute_prompt(prompt, RunConfig())
except Exception as exc:
print(f" LLM error ({_time.time() - t0:.1f}s): {exc}")
return None
max_retries = 3
for attempt in range(max_retries + 1):
try:
response = self.llm_adapter.execute_prompt(prompt, RunConfig(max_tokens=max_tokens))
break # success
except LLMRateLimitError as exc:
if attempt < max_retries:
wait = 15 * (attempt + 1) # 15, 30, 45 seconds
print(f" Rate limited, retrying in {wait}s (attempt {attempt + 1}/{max_retries})...")
_time.sleep(wait)
else:
print(f" LLM rate limit after {max_retries} retries ({_time.time() - t0:.1f}s): {exc}")
return None
except Exception as exc:
print(f" LLM error ({_time.time() - t0:.1f}s): {exc}")
return None
elapsed = _time.time() - t0
usage = response.usage
@@ -260,9 +273,9 @@ class ChapterProcessor:
return content
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str) -> Optional[str]:
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
"""Call the LLM, write the result to *output_file*, and return it."""
content = self._call_llm(prompt, stage_label)
content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
if content:
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(content)
@@ -812,7 +825,7 @@ def main():
parser.add_argument(
"--provider",
type=str,
choices=["openrouter", "claude-code"],
choices=["openrouter", "claude-code", "gemini"],
default=None,
help="LLM provider for auto-generating outputs (omit for manual mode)",
)