diff --git a/markitect/infospace/cli.py b/markitect/infospace/cli.py
index 98ba94da..0a8073f5 100644
--- a/markitect/infospace/cli.py
+++ b/markitect/infospace/cli.py
@@ -237,7 +237,9 @@ def _entities_by_type(cfg, root: "Path", entity_list: list) -> None:
 @click.option("--model", default=None, help="LLM model name.")
 @click.option("--entity", "entity_slug", default=None, help="Evaluate a single entity by slug.")
 @click.option("--chapter", default=None, help="Evaluate entities from a specific chapter.")
-def evaluate(config_path, provider, model, entity_slug, chapter):
+@click.option("--force", is_flag=True, default=False,
+              help="Re-evaluate entities whose evaluation file already exists.")
+def evaluate(config_path, provider, model, entity_slug, chapter, force):
     """Evaluate entities using LLM-based quality assessment."""
     cfg, cfg_path = _load_config_or_exit(config_path)
     root = cfg_path.parent
@@ -252,32 +254,44 @@ def evaluate(config_path, provider, model, entity_slug, chapter):
         click.echo("No entities to evaluate.")
         return
 
-    # Filter
+    # Filter. Accept hyphenated input for --entity by normalizing to the
+    # underscore slug format produced by parse_entity_directory.
     if entity_slug:
-        entity_list = [e for e in entity_list if e.slug == entity_slug]
-        if not entity_list:
-            click.echo(f"Error: Entity '{entity_slug}' not found.", err=True)
+        normalized = entity_slug.replace("-", "_")
+        matches = [e for e in entity_list if e.slug == normalized]
+        if not matches:
+            # Build a short "did you mean…" list from entities sharing a stem.
+            stem = normalized.split("_", 1)[0]
+            near = sorted(e.slug for e in entity_list if e.slug.startswith(stem))[:5]
+            msg = f"Error: Entity '{entity_slug}' not found."
+            if near:
+                msg += f" Did you mean: {', '.join(near)} ?"
+            click.echo(msg, err=True)
             raise SystemExit(1)
+        entity_list = matches
     elif chapter:
         entity_list = [e for e in entity_list if chapter in e.source_chapter]
         if not entity_list:
             click.echo(f"No entities found for chapter '{chapter}'.")
             return
 
-    # Skip entities that already have evaluation files (incremental resume)
+    # Skip entities that already have evaluation files (incremental resume).
+    # Applies uniformly to full-pass, --entity, and --chapter runs unless
+    # --force is set.
     from markitect.infospace.evaluate import run_entity_evaluation
     output_dir = root / cfg.evaluations_dir
-    if not entity_slug and not chapter and output_dir.is_dir():
-        previous_digests = {
-            p.stem: ""  # non-empty sentinel → triggers skip in BatchEvaluator
-            for p in output_dir.glob("*.md")
-        }
-        entity_list = [e for e in entity_list if e.slug not in previous_digests]
+    if not force and output_dir.is_dir():
+        existing = {p.stem for p in output_dir.glob("*.md")}
+        before = len(entity_list)
+        entity_list = [e for e in entity_list if e.slug not in existing]
+        skipped = before - len(entity_list)
         if not entity_list:
-            click.echo("All entities already evaluated. Nothing to do.")
+            click.echo("All selected entities already evaluated. "
+                       "Re-run with --force to overwrite.")
             return
-        if previous_digests:
-            click.echo(f"Skipping {len(previous_digests)} already-evaluated entities.")
+        if skipped:
+            click.echo(f"Skipping {skipped} already-evaluated entities. "
+                       "Use --force to re-evaluate.")
 
     # Create adapter
     from markitect.llm import create_adapter
@@ -285,10 +299,14 @@ def evaluate(config_path, provider, model, entity_slug, chapter):
     adapter = create_adapter(provider, model=model)
     run_config = RunConfig(model_name=model, temperature=0.3, max_tokens=2000)
 
-    # Progress callback
+    # Progress callback — surface error detail so agents don't have to
+    # drop into Python to see whether an ERROR was 429, 503, or auth.
     def on_progress(done, total, result):
         status = result.status.upper()
-        click.echo(f"  [{done}/{total}] {result.key}: {status}")
+        if status == "ERROR" and result.error:
+            click.echo(f"  [{done}/{total}] {result.key}: ERROR — {result.error}")
+        else:
+            click.echo(f"  [{done}/{total}] {result.key}: {status}")
 
     click.echo(f"Evaluating {len(entity_list)} entities via {provider}...")
 
diff --git a/markitect/infospace/evaluate.py b/markitect/infospace/evaluate.py
index b755f49d..a1886e4d 100644
--- a/markitect/infospace/evaluate.py
+++ b/markitect/infospace/evaluate.py
@@ -195,12 +195,23 @@ def run_entity_evaluation(
     """
     topic = config.topic.name
     evaluations_path = output_dir or Path(config.evaluations_dir)
-    evaluator_name = (run_config.model_name if run_config else "unknown")
+    # Fall back from run_config.model_name (may be None if the CLI user did
+    # not pass --model) to the adapter's resolved model, and only then to
+    # "unknown". Keeps the evaluator field in the written frontmatter
+    # informative for later audits.
+    default_evaluator = (
+        (run_config.model_name if run_config else None)
+        or getattr(adapter, "_model", None)
+        or "unknown"
+    )
 
     def _write_and_notify(done: int, total: int, result) -> None:
         # Write file immediately on success (incremental — run is resumable)
         if result.status == "success" and result.response is not None:
             scores = parse_evaluation_response(result.response.content, dimensions)
+            # Prefer the model name the adapter actually echoed back — it
+            # reflects post-resolution fallbacks (e.g. flash → flash-lite).
+            evaluator_name = result.response.model or default_evaluator
             evaluation = EntityEvaluation(
                 entity_slug=result.key,
                 evaluator=evaluator_name,
diff --git a/markitect/infospace/history.py b/markitect/infospace/history.py
index dca508d4..3f6f6790 100644
--- a/markitect/infospace/history.py
+++ b/markitect/infospace/history.py
@@ -81,17 +81,26 @@ def snapshot_from_checks(
 # ── Metrics file I/O ────────────────────────────────────────────────
 
 
-def write_metrics_file(metrics: Dict[str, float], path: Path) -> None:
+def write_metrics_file(metrics: Dict[str, Any], path: Path) -> None:
     """Write the latest metrics to a simple YAML file.
 
     This file is used by ``markitect infospace viability`` for quick
-    threshold checking.
+    threshold checking. Non-numeric values (e.g. ``type_distribution``)
+    are passed through unchanged; floats are rounded to 6 dp; ints are
+    preserved as ints so external consumers don't see ``29`` silently
+    become ``29.0`` on every round-trip.
     """
+    def _normalize(v: Any) -> Any:
+        if isinstance(v, bool):
+            return v
+        if isinstance(v, float):
+            return round(v, 6)
+        return v
+
     path.parent.mkdir(parents=True, exist_ok=True)
     path.write_text(
         yaml.safe_dump(
-            {k: round(v, 6) if isinstance(v, float) else v
-             for k, v in sorted(metrics.items())},
+            {k: _normalize(v) for k, v in sorted(metrics.items())},
             default_flow_style=False,
             sort_keys=True,
         ),
@@ -99,14 +108,20 @@ def write_metrics_file(metrics: Dict[str, float], path: Path) -> None:
     )
 
 
-def read_metrics_file(path: Path) -> Dict[str, float]:
-    """Read the latest metrics from a YAML file."""
+def read_metrics_file(path: Path) -> Dict[str, Any]:
+    """Read the latest metrics from a YAML file.
+
+    Returns all keys as written on disk, preserving types verbatim so a
+    round-trip via :func:`write_metrics_file` does not silently drop
+    structured values (e.g. ``type_distribution``) or flatten ints to
+    floats.
+    """
     if not path.is_file():
         return {}
     raw = yaml.safe_load(path.read_text(encoding="utf-8"))
     if not isinstance(raw, dict):
         return {}
-    return {k: float(v) for k, v in raw.items() if isinstance(v, (int, float))}
+    return raw
 
 
 # ── History operations ───────────────────────────────────────────────
diff --git a/markitect/llm/gemini.py b/markitect/llm/gemini.py
index f738828e..2a0268b9 100644
--- a/markitect/llm/gemini.py
+++ b/markitect/llm/gemini.py
@@ -9,7 +9,11 @@ from markitect.llm.adapter import LLMAdapter
 from markitect.llm.models import RunConfig, LLMResponse
 from markitect.llm.config import resolve_api_key, find_project_root
 from markitect.llm._http import post_json
-from markitect.llm.exceptions import LLMConfigurationError
+from markitect.llm.exceptions import (
+    LLMConfigurationError,
+    LLMAPIError,
+    LLMRateLimitError,
+)
 
 _DEFAULT_MODEL = "gemini-2.5-flash"
 _API_BASE = "https://generativelanguage.googleapis.com/v1beta"
@@ -26,10 +30,12 @@ class GeminiAdapter(LLMAdapter):
         model: Optional[str] = None,
         api_key: Optional[str] = None,
         system_prompt: Optional[str] = None,
+        max_retries: int = 3,
         **_kwargs: Any,
     ):
         self._model = model or _DEFAULT_MODEL
         self._system_prompt = system_prompt
+        self._max_retries = max_retries
 
         root = find_project_root()
         key_file_paths = [root / "apikey-geminifree.txt"] if root else []
@@ -77,7 +83,7 @@ class GeminiAdapter(LLMAdapter):
         url = f"{_API_BASE}/models/{model}:generateContent?key={self._api_key}"
 
         start = time.time()
-        data = post_json(url, payload, timeout=config.timeout_seconds)
+        data = self._post_with_retries(url, payload, timeout=config.timeout_seconds)
         latency = time.time() - start
 
         # Parse Gemini response
@@ -113,3 +119,27 @@ class GeminiAdapter(LLMAdapter):
         if not (0.0 <= config.temperature <= 2.0):
             return False
         return True
+
+    # ── Internals ───────────────────────────────────────────────────
+
+    def _post_with_retries(
+        self,
+        url: str,
+        payload: Dict[str, Any],
+        timeout: int,
+    ) -> Dict[str, Any]:
+        last_exc: Optional[Exception] = None
+        for attempt in range(self._max_retries + 1):
+            try:
+                return post_json(url, payload, timeout=timeout)
+            except LLMRateLimitError as exc:
+                last_exc = exc
+                if attempt < self._max_retries:
+                    time.sleep(2 ** attempt)
+            except LLMAPIError as exc:
+                if exc.status_code in (502, 503, 504) and attempt < self._max_retries:
+                    last_exc = exc
+                    time.sleep(2 ** attempt)
+                else:
+                    raise
+        raise last_exc  # type: ignore[misc]
diff --git a/tests/unit/infospace/test_history.py b/tests/unit/infospace/test_history.py
index 3fe466c3..8b46d7c1 100644
--- a/tests/unit/infospace/test_history.py
+++ b/tests/unit/infospace/test_history.py
@@ -124,6 +124,33 @@ class TestMetricsFileIO:
         path.write_text("just a string", encoding="utf-8")
         assert read_metrics_file(path) == {}
 
+    def test_round_trip_preserves_structured_values(self, tmp_path):
+        """Non-numeric values like type_distribution must survive a round-trip.
+
+        Regression: eval-summary --update-metrics used to drop any key
+        whose value wasn't a bare number, silently erasing type_distribution
+        from the file on every run.
+        """
+        path = tmp_path / "metrics.yaml"
+        metrics = {
+            "per_entity_mean": 3.9567,
+            "vsm_type_matrix_cells": 29,
+            "type_distribution": {
+                "Element": 315,
+                "Institution": 122,
+                "Principle": 102,
+            },
+        }
+        write_metrics_file(metrics, path)
+        loaded = read_metrics_file(path)
+        assert loaded["type_distribution"] == {
+            "Element": 315, "Institution": 122, "Principle": 102,
+        }
+        # And the int stayed an int on disk, not 29.0.
+        raw = path.read_text(encoding="utf-8")
+        assert "vsm_type_matrix_cells: 29\n" in raw
+        assert "vsm_type_matrix_cells: 29.0" not in raw
+
 
 # ── record_check_results ────────────────────────────────────────────
 
diff --git a/tests/unit/llm/test_gemini.py b/tests/unit/llm/test_gemini.py
new file mode 100644
index 00000000..b319b0ba
--- /dev/null
+++ b/tests/unit/llm/test_gemini.py
@@ -0,0 +1,82 @@
+"""Tests for markitect.llm.gemini — retry behavior + happy path."""
+
+from unittest import mock
+
+import pytest
+
+from markitect.llm.gemini import GeminiAdapter
+from markitect.llm.exceptions import LLMAPIError, LLMRateLimitError
+from markitect.prompts.execution.models import RunConfig, LLMResponse
+
+
+def _api_response(text="hello", model="gemini-2.5-flash"):
+    return {
+        "candidates": [
+            {
+                "content": {"parts": [{"text": text}], "role": "model"},
+                "finishReason": "STOP",
+            }
+        ],
+        "modelVersion": model,
+        "usageMetadata": {
+            "promptTokenCount": 3,
+            "candidatesTokenCount": 2,
+            "totalTokenCount": 5,
+        },
+    }
+
+
+class TestGeminiAdapter:
+    def _adapter(self, **kwargs):
+        defaults = {"api_key": "AIza-test"}
+        defaults.update(kwargs)
+        return GeminiAdapter(**defaults)
+
+    @mock.patch("markitect.llm.gemini.post_json")
+    def test_success(self, mock_post):
+        mock_post.return_value = _api_response("generated")
+        adapter = self._adapter()
+        resp = adapter.execute_prompt("hi", RunConfig())
+        assert isinstance(resp, LLMResponse)
+        assert resp.content == "generated"
+        assert resp.metadata["provider"] == "gemini"
+
+    @mock.patch("markitect.llm.gemini.post_json")
+    @mock.patch("markitect.llm.gemini.time.sleep")
+    def test_retry_on_429(self, mock_sleep, mock_post):
+        mock_post.side_effect = [
+            LLMRateLimitError("rate limited", status_code=429),
+            _api_response("recovered"),
+        ]
+        adapter = self._adapter(max_retries=2)
+        resp = adapter.execute_prompt("hi", RunConfig())
+        assert resp.content == "recovered"
+        assert mock_sleep.call_count == 1
+
+    @mock.patch("markitect.llm.gemini.post_json")
+    @mock.patch("markitect.llm.gemini.time.sleep")
+    def test_retry_on_503(self, mock_sleep, mock_post):
+        mock_post.side_effect = [
+            LLMAPIError("unavailable", status_code=503),
+            _api_response("back"),
+        ]
+        adapter = self._adapter(max_retries=2)
+        resp = adapter.execute_prompt("hi", RunConfig())
+        assert resp.content == "back"
+
+    @mock.patch("markitect.llm.gemini.post_json")
+    def test_no_retry_on_400(self, mock_post):
+        mock_post.side_effect = LLMAPIError("bad request", status_code=400)
+        adapter = self._adapter(max_retries=2)
+        with pytest.raises(LLMAPIError) as exc_info:
+            adapter.execute_prompt("hi", RunConfig())
+        assert exc_info.value.status_code == 400
+
+    @mock.patch("markitect.llm.gemini.post_json")
+    @mock.patch("markitect.llm.gemini.time.sleep")
+    def test_exhausted_retries_raises(self, mock_sleep, mock_post):
+        mock_post.side_effect = LLMRateLimitError("rate limited", status_code=429)
+        adapter = self._adapter(max_retries=1)
+        with pytest.raises(LLMRateLimitError):
+            adapter.execute_prompt("hi", RunConfig())
+        assert mock_sleep.call_count == 1  # 1 retry before giving up