feat(llm): add Gemini adapter and process book-1-chapter-05
Add GeminiAdapter calling Google's Generative Language REST API (default model: gemini-2.5-flash). Register "gemini" as third provider in the factory and CLI. Add rate-limit retry with exponential backoff to the pipeline's _call_llm helper. Increase default max_tokens from 2000 to 4096. Process book-1-chapter-05 via Gemini free tier — 1 new entity extracted (necessaries-conveniencies-and-amusements-of-life), 41 existing entities correctly skipped by dedup. Canonical set now at 42 unique entities. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -228,21 +228,34 @@ class ChapterProcessor:
|
||||
|
||||
# ── LLM Execution Helpers ─────────────────────────────────────────
|
||||
|
||||
def _call_llm(self, prompt: str, stage_label: str) -> Optional[str]:
|
||||
def _call_llm(self, prompt: str, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
|
||||
"""Call the LLM and return the content string, or ``None`` on failure.
|
||||
|
||||
Retries up to 3 times on rate-limit (429) errors with exponential backoff.
|
||||
Does **not** write any files — callers decide where to persist.
|
||||
"""
|
||||
import time as _time
|
||||
from markitect.prompts.execution.models import RunConfig
|
||||
from markitect.llm.exceptions import LLMRateLimitError
|
||||
|
||||
print(f" Calling LLM ({stage_label})...")
|
||||
t0 = _time.time()
|
||||
try:
|
||||
response = self.llm_adapter.execute_prompt(prompt, RunConfig())
|
||||
except Exception as exc:
|
||||
print(f" LLM error ({_time.time() - t0:.1f}s): {exc}")
|
||||
return None
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
response = self.llm_adapter.execute_prompt(prompt, RunConfig(max_tokens=max_tokens))
|
||||
break # success
|
||||
except LLMRateLimitError as exc:
|
||||
if attempt < max_retries:
|
||||
wait = 15 * (attempt + 1) # 15, 30, 45 seconds
|
||||
print(f" Rate limited, retrying in {wait}s (attempt {attempt + 1}/{max_retries})...")
|
||||
_time.sleep(wait)
|
||||
else:
|
||||
print(f" LLM rate limit after {max_retries} retries ({_time.time() - t0:.1f}s): {exc}")
|
||||
return None
|
||||
except Exception as exc:
|
||||
print(f" LLM error ({_time.time() - t0:.1f}s): {exc}")
|
||||
return None
|
||||
|
||||
elapsed = _time.time() - t0
|
||||
usage = response.usage
|
||||
@@ -260,9 +273,9 @@ class ChapterProcessor:
|
||||
|
||||
return content
|
||||
|
||||
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str) -> Optional[str]:
|
||||
def _execute_llm(self, prompt: str, output_file: Path, stage_label: str, max_tokens: int = 4096) -> Optional[str]:
|
||||
"""Call the LLM, write the result to *output_file*, and return it."""
|
||||
content = self._call_llm(prompt, stage_label)
|
||||
content = self._call_llm(prompt, stage_label, max_tokens=max_tokens)
|
||||
if content:
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_file.write_text(content)
|
||||
@@ -812,7 +825,7 @@ def main():
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
type=str,
|
||||
choices=["openrouter", "claude-code"],
|
||||
choices=["openrouter", "claude-code", "gemini"],
|
||||
default=None,
|
||||
help="LLM provider for auto-generating outputs (omit for manual mode)",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user