fix(lifecycle): _relative_to_root path doubling with relative workspace

fix(evaluation_io): tolerate code-fenced frontmatter and varied score shapes from small LLMs Two bugs surfaced running the first live Lefevre chapter-I smoke against openai/gpt-4o-mini. 1. _relative_to_root doubled artifact paths when --workspace was a relative path (e.g. "."). The function received an already-CWD- relative path like infospaces/foo/artifacts/sources/x.md and re-prepended root, producing infospaces/foo/infospaces/foo/... stored in artifacts/index.yaml — which then failed file reads on the subsequent workflow stage. Fix: when raw is relative, try CWD-relative resolution first (matches root / sub call shapes); fall back to root-prefixing only when the CWD interpretation does not land under root (matches bare relative-subpath call shapes from rendered template outputs). 2. _read_frontmatter_markdown only accepted a literal ---/--- delimited block at the start of the file. gpt-4o-mini emitted three other shapes across the seven evaluation files this chapter produced: - ```yaml ... ``` fence (no --- delimiters) - ```markdown ... ``` outer fence wrapping --- frontmatter - scores as mapping ({groundedness: 4, ...}) instead of the canonical list of {name, value} dicts - scores as list of single-key dicts ([{groundedness: 4}, ...]) Fix: _extract_frontmatter_block tolerates ```yaml fences and strips ```markdown outer fences; _normalise_scores rewrites mapping- and single-key-dict shapes into the canonical form so ScoreEntry.from_dict keeps working. Both fixes are pure-Python; no API changes. 179 tests pass, 2 skipped. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 03:26:55 +02:00
parent 08ecefe309
commit 9404831069
2 changed files with 126 additions and 25 deletions
--- a/src/infospace_bench/evaluation_io.py
+++ b/src/infospace_bench/evaluation_io.py
@@ -136,21 +136,7 @@ def read_history(history_path: str | Path) -> list[EvaluationSnapshot]:
 def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
    text = path.read_text(encoding="utf-8")
-    if not text.startswith(f"{FRONTMATTER_MARKER}\n"):
+    raw, body = _extract_frontmatter_block(text, path)
        raise InfospaceError(
            "invalid_evaluation_file",
            f"Missing YAML frontmatter in evaluation file: {path}",
            {"path": str(path)},
        )
    end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
    if end == -1:
        raise InfospaceError(
            "invalid_evaluation_file",
            f"Unclosed YAML frontmatter in evaluation file: {path}",
            {"path": str(path)},
        )
    raw = text[len(FRONTMATTER_MARKER) + 1 : end]
    body = text[end + len(FRONTMATTER_MARKER) + 2 :]
    data = yaml.safe_load(raw)
    if not isinstance(data, dict):
        raise InfospaceError(
@@ -158,9 +144,105 @@ def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
            f"Expected mapping frontmatter in evaluation file: {path}",
            {"path": str(path)},
        )
    _normalise_scores(data)
    return data, body
 def _normalise_scores(data: dict[str, Any]) -> None:
    """Normalise score shapes emitted by various LLMs into the canonical
    list-of-{name, value} form the rest of the pipeline expects.
    Handles three variants beyond the canonical:
    - mapping form: ``scores: {groundedness: 5, lesson_clarity: 4}``
    - list of single-key dicts: ``[{groundedness: 4}, {lesson_clarity: 3}]``
    - list of canonical dicts (left as-is)
    """
    scores = data.get("scores")
    if isinstance(scores, dict):
        data["scores"] = [
            {"name": str(name), "value": _coerce_score(value)}
            for name, value in scores.items()
        ]
    elif isinstance(scores, list):
        normalised: list[dict[str, Any]] = []
        for item in scores:
            if not isinstance(item, dict):
                continue
            if "name" in item and "value" in item:
                normalised.append(item)
            elif len(item) == 1:
                (name, value), = item.items()
                normalised.append({"name": str(name), "value": _coerce_score(value)})
            else:
                normalised.append(item)
        data["scores"] = normalised
 def _coerce_score(value: Any) -> float:
    try:
        return float(value)
    except (TypeError, ValueError):
        return 0.0
 def _extract_frontmatter_block(text: str, path: Path) -> tuple[str, str]:
    """Pull a YAML frontmatter block out of an evaluation file.
    Tolerates several shapes commonly produced by LLMs:
    - the canonical ``---``-delimited block at the start of the file
    - a ``` ```yaml ... ``` `` code fence at the start of the file
    - a ``` ```markdown ... ``` `` outer fence wrapping ``---`` frontmatter
    """
    stripped_text = text.lstrip("\n")
    # Strip an outer ```markdown / ```md fence if present and recurse on its
    # body so any ``---`` frontmatter inside still gets recognised.
    for outer_marker in ("```markdown\n", "```md\n"):
        if stripped_text.startswith(outer_marker):
            inner_start = len(outer_marker)
            closing_idx = stripped_text.rfind("```")
            if closing_idx <= inner_start:
                break
            inner = stripped_text[inner_start:closing_idx].rstrip()
            return _extract_frontmatter_block(inner, path)
    if stripped_text.startswith(f"{FRONTMATTER_MARKER}\n"):
        text = stripped_text
        end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
        if end == -1:
            # Also accept a closing fence at EOF without a trailing newline.
            if text.rstrip().endswith(FRONTMATTER_MARKER):
                end = text.rstrip().rfind(FRONTMATTER_MARKER) - 1
            else:
                raise InfospaceError(
                    "invalid_evaluation_file",
                    f"Unclosed YAML frontmatter in evaluation file: {path}",
                    {"path": str(path)},
                )
        raw = text[len(FRONTMATTER_MARKER) + 1 : end]
        body = text[end + len(FRONTMATTER_MARKER) + 2 :]
        return raw, body
    if stripped_text.startswith("```yaml") or stripped_text.startswith("```yml"):
        fence_start = stripped_text.find("```")
        content_start = stripped_text.find("\n", fence_start) + 1
        fence_end = stripped_text.find("\n```", content_start)
        if fence_end == -1:
            raise InfospaceError(
                "invalid_evaluation_file",
                f"Unclosed YAML code fence in evaluation file: {path}",
                {"path": str(path)},
            )
        raw = stripped_text[content_start:fence_end]
        body = stripped_text[fence_end + len("\n```") :]
        return raw, body.lstrip("\n")
    raise InfospaceError(
        "invalid_evaluation_file",
        f"Missing YAML frontmatter in evaluation file: {path}",
        {"path": str(path)},
    )
 def _parse_rationales(body: str) -> dict[str, str]:
    rationales: dict[str, str] = {}
    current_name: str | None = None
--- a/src/infospace_bench/lifecycle.py
+++ b/src/infospace_bench/lifecycle.py
@@ -219,18 +219,37 @@ def _read_yaml(path: Path) -> dict[str, Any]:
 def _relative_to_root(root: Path, path: Path | str) -> str:
    """Return ``path`` relative to ``root``, accepting either call shape.
    Callers pass either a fully-resolved ``root / sub`` style path or a
    bare ``sub`` path that should be interpreted relative to ``root``.
    With a relative ``root`` the old single-interpretation logic produced
    a doubled path (e.g. ``infospaces/foo/infospaces/foo/...``) because it
    re-prepended ``root`` to a path that was already under ``root`` when
    resolved from CWD. The fix tries the CWD interpretation first and only
    falls back to root-prefixing when the CWD interpretation doesn't land
    under ``root``.
    """
    raw = Path(path)
    target = raw if raw.is_absolute() else root / raw
    root_resolved = root.resolve()
-    target_resolved = target.resolve()
+    if raw.is_absolute():
-    try:
+        candidates = [raw.resolve()]
-        return str(target_resolved.relative_to(root_resolved))
+    else:
-    except ValueError as exc:
+        cwd_candidate = raw.resolve()
-        raise InfospaceError(
+        joined_candidate = (root / raw).resolve()
-            "artifact_path_escapes_infospace",
+        candidates = [cwd_candidate]
-            f"Artifact path escapes infospace root: {path}",
+        if joined_candidate != cwd_candidate:
-            {"root": str(root), "path": str(path)},
+            candidates.append(joined_candidate)
-        ) from exc
+    for candidate in candidates:
        try:
            return str(candidate.relative_to(root_resolved))
        except ValueError:
            continue
    raise InfospaceError(
        "artifact_path_escapes_infospace",
        f"Artifact path escapes infospace root: {path}",
        {"root": str(root), "path": str(path)},
    )
 def _write_yaml(path: Path, data: dict[str, Any]) -> None: