markitect-main/markitect/llm/gemini.py

"""
Google Gemini adapter — calls the Generative Language REST API directly.
"""

import time
from typing import Optional, Dict, Any

from markitect.llm.adapter import LLMAdapter
from markitect.llm.models import RunConfig, LLMResponse
from markitect.llm.config import resolve_api_key, find_project_root
from markitect.llm._http import post_json
from markitect.llm.exceptions import (
    LLMConfigurationError,
    LLMAPIError,
    LLMRateLimitError,
)

_DEFAULT_MODEL = "gemini-2.5-flash"
_API_BASE = "https://generativelanguage.googleapis.com/v1beta"


class GeminiAdapter(LLMAdapter):
    """LLM adapter that calls the Google Generative Language API.

    Supports the free tier of Gemini models via a Google AI Studio API key.
    """

    def __init__(
        self,
        model: Optional[str] = None,
        api_key: Optional[str] = None,
        system_prompt: Optional[str] = None,
        max_retries: int = 3,
        **_kwargs: Any,
    ):
        self._model = model or _DEFAULT_MODEL
        self._system_prompt = system_prompt
        self._max_retries = max_retries

        root = find_project_root()
        key_file_paths = [root / "apikey-geminifree.txt"] if root else []
        self._api_key = resolve_api_key(
            explicit=api_key,
            env_var="GEMINI_API_KEY",
            key_file_paths=key_file_paths,
        )
        if not self._api_key:
            raise LLMConfigurationError(
                "No Gemini API key found. Set GEMINI_API_KEY or create "
                "apikey-geminifree.txt in the project root.",
                context={"provider": "gemini"},
            )

    # ── LLMAdapter interface ────────────────────────────────────────

    def execute_prompt(self, prompt: str, config: RunConfig) -> LLMResponse:
        model = self._model

        # Build Gemini request
        contents: list[Dict[str, Any]] = []
        if self._system_prompt:
            contents.append({
                "role": "user",
                "parts": [{"text": self._system_prompt}],
            })
            contents.append({
                "role": "model",
                "parts": [{"text": "Understood."}],
            })
        contents.append({
            "role": "user",
            "parts": [{"text": prompt}],
        })

        payload: Dict[str, Any] = {
            "contents": contents,
            "generationConfig": {
                "temperature": config.temperature,
                "maxOutputTokens": config.max_tokens,
            },
        }

        url = f"{_API_BASE}/models/{model}:generateContent?key={self._api_key}"

        start = time.time()
        data = self._post_with_retries(url, payload, timeout=config.timeout_seconds)
        latency = time.time() - start

        # Parse Gemini response
        candidates = data.get("candidates", [])
        if not candidates:
            content = ""
            finish_reason = "error"
        else:
            parts = candidates[0].get("content", {}).get("parts", [])
            content = "".join(p.get("text", "") for p in parts)
            finish_reason = candidates[0].get("finishReason", "STOP").lower()

        usage_meta = data.get("usageMetadata", {})

        return LLMResponse(
            content=content,
            model=model,
            usage={
                "prompt_tokens": usage_meta.get("promptTokenCount", 0),
                "completion_tokens": usage_meta.get("candidatesTokenCount", 0),
                "total_tokens": usage_meta.get("totalTokenCount", 0),
            },
            finish_reason=finish_reason,
            metadata={
                "provider": "gemini",
                "latency_seconds": round(latency, 3),
            },
        )

    def validate_config(self, config: RunConfig) -> bool:
        if not self._api_key:
            return False
        if not (0.0 <= config.temperature <= 2.0):
            return False
        return True

    # ── Internals ───────────────────────────────────────────────────

    def _post_with_retries(
        self,
        url: str,
        payload: Dict[str, Any],
        timeout: int,
    ) -> Dict[str, Any]:
        last_exc: Optional[Exception] = None
        for attempt in range(self._max_retries + 1):
            try:
                return post_json(url, payload, timeout=timeout)
            except LLMRateLimitError as exc:
                last_exc = exc
                if attempt < self._max_retries:
                    time.sleep(2 ** attempt)
            except LLMAPIError as exc:
                if exc.status_code in (502, 503, 504) and attempt < self._max_retries:
                    last_exc = exc
                    time.sleep(2 ** attempt)
                else:
                    raise
        raise last_exc  # type: ignore[misc]