fix(lifecycle): _relative_to_root path doubling with relative workspace

fix(evaluation_io): tolerate code-fenced frontmatter and varied score
shapes from small LLMs

Two bugs surfaced running the first live Lefevre chapter-I smoke
against openai/gpt-4o-mini.

1. _relative_to_root doubled artifact paths when --workspace was a
   relative path (e.g. "."). The function received an already-CWD-
   relative path like infospaces/foo/artifacts/sources/x.md and
   re-prepended root, producing infospaces/foo/infospaces/foo/...
   stored in artifacts/index.yaml — which then failed file reads on
   the subsequent workflow stage. Fix: when raw is relative, try
   CWD-relative resolution first (matches root / sub call shapes);
   fall back to root-prefixing only when the CWD interpretation does
   not land under root (matches bare relative-subpath call shapes
   from rendered template outputs).

2. _read_frontmatter_markdown only accepted a literal ---/---
   delimited block at the start of the file. gpt-4o-mini emitted three
   other shapes across the seven evaluation files this chapter
   produced:

     - ```yaml ... ``` fence (no --- delimiters)
     - ```markdown ... ``` outer fence wrapping --- frontmatter
     - scores as mapping ({groundedness: 4, ...}) instead of the
       canonical list of {name, value} dicts
     - scores as list of single-key dicts ([{groundedness: 4}, ...])

   Fix: _extract_frontmatter_block tolerates ```yaml fences and strips
   ```markdown outer fences; _normalise_scores rewrites mapping- and
   single-key-dict shapes into the canonical form so ScoreEntry.from_dict
   keeps working.

Both fixes are pure-Python; no API changes. 179 tests pass, 2 skipped.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 03:26:55 +02:00
parent 08ecefe309
commit 9404831069
2 changed files with 126 additions and 25 deletions

View File

@@ -136,21 +136,7 @@ def read_history(history_path: str | Path) -> list[EvaluationSnapshot]:
def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
text = path.read_text(encoding="utf-8")
if not text.startswith(f"{FRONTMATTER_MARKER}\n"):
raise InfospaceError(
"invalid_evaluation_file",
f"Missing YAML frontmatter in evaluation file: {path}",
{"path": str(path)},
)
end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
if end == -1:
raise InfospaceError(
"invalid_evaluation_file",
f"Unclosed YAML frontmatter in evaluation file: {path}",
{"path": str(path)},
)
raw = text[len(FRONTMATTER_MARKER) + 1 : end]
body = text[end + len(FRONTMATTER_MARKER) + 2 :]
raw, body = _extract_frontmatter_block(text, path)
data = yaml.safe_load(raw)
if not isinstance(data, dict):
raise InfospaceError(
@@ -158,9 +144,105 @@ def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
f"Expected mapping frontmatter in evaluation file: {path}",
{"path": str(path)},
)
_normalise_scores(data)
return data, body
def _normalise_scores(data: dict[str, Any]) -> None:
"""Normalise score shapes emitted by various LLMs into the canonical
list-of-{name, value} form the rest of the pipeline expects.
Handles three variants beyond the canonical:
- mapping form: ``scores: {groundedness: 5, lesson_clarity: 4}``
- list of single-key dicts: ``[{groundedness: 4}, {lesson_clarity: 3}]``
- list of canonical dicts (left as-is)
"""
scores = data.get("scores")
if isinstance(scores, dict):
data["scores"] = [
{"name": str(name), "value": _coerce_score(value)}
for name, value in scores.items()
]
elif isinstance(scores, list):
normalised: list[dict[str, Any]] = []
for item in scores:
if not isinstance(item, dict):
continue
if "name" in item and "value" in item:
normalised.append(item)
elif len(item) == 1:
(name, value), = item.items()
normalised.append({"name": str(name), "value": _coerce_score(value)})
else:
normalised.append(item)
data["scores"] = normalised
def _coerce_score(value: Any) -> float:
try:
return float(value)
except (TypeError, ValueError):
return 0.0
def _extract_frontmatter_block(text: str, path: Path) -> tuple[str, str]:
"""Pull a YAML frontmatter block out of an evaluation file.
Tolerates several shapes commonly produced by LLMs:
- the canonical ``---``-delimited block at the start of the file
- a ``` ```yaml ... ``` `` code fence at the start of the file
- a ``` ```markdown ... ``` `` outer fence wrapping ``---`` frontmatter
"""
stripped_text = text.lstrip("\n")
# Strip an outer ```markdown / ```md fence if present and recurse on its
# body so any ``---`` frontmatter inside still gets recognised.
for outer_marker in ("```markdown\n", "```md\n"):
if stripped_text.startswith(outer_marker):
inner_start = len(outer_marker)
closing_idx = stripped_text.rfind("```")
if closing_idx <= inner_start:
break
inner = stripped_text[inner_start:closing_idx].rstrip()
return _extract_frontmatter_block(inner, path)
if stripped_text.startswith(f"{FRONTMATTER_MARKER}\n"):
text = stripped_text
end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
if end == -1:
# Also accept a closing fence at EOF without a trailing newline.
if text.rstrip().endswith(FRONTMATTER_MARKER):
end = text.rstrip().rfind(FRONTMATTER_MARKER) - 1
else:
raise InfospaceError(
"invalid_evaluation_file",
f"Unclosed YAML frontmatter in evaluation file: {path}",
{"path": str(path)},
)
raw = text[len(FRONTMATTER_MARKER) + 1 : end]
body = text[end + len(FRONTMATTER_MARKER) + 2 :]
return raw, body
if stripped_text.startswith("```yaml") or stripped_text.startswith("```yml"):
fence_start = stripped_text.find("```")
content_start = stripped_text.find("\n", fence_start) + 1
fence_end = stripped_text.find("\n```", content_start)
if fence_end == -1:
raise InfospaceError(
"invalid_evaluation_file",
f"Unclosed YAML code fence in evaluation file: {path}",
{"path": str(path)},
)
raw = stripped_text[content_start:fence_end]
body = stripped_text[fence_end + len("\n```") :]
return raw, body.lstrip("\n")
raise InfospaceError(
"invalid_evaluation_file",
f"Missing YAML frontmatter in evaluation file: {path}",
{"path": str(path)},
)
def _parse_rationales(body: str) -> dict[str, str]:
rationales: dict[str, str] = {}
current_name: str | None = None

View File

@@ -219,18 +219,37 @@ def _read_yaml(path: Path) -> dict[str, Any]:
def _relative_to_root(root: Path, path: Path | str) -> str:
"""Return ``path`` relative to ``root``, accepting either call shape.
Callers pass either a fully-resolved ``root / sub`` style path or a
bare ``sub`` path that should be interpreted relative to ``root``.
With a relative ``root`` the old single-interpretation logic produced
a doubled path (e.g. ``infospaces/foo/infospaces/foo/...``) because it
re-prepended ``root`` to a path that was already under ``root`` when
resolved from CWD. The fix tries the CWD interpretation first and only
falls back to root-prefixing when the CWD interpretation doesn't land
under ``root``.
"""
raw = Path(path)
target = raw if raw.is_absolute() else root / raw
root_resolved = root.resolve()
target_resolved = target.resolve()
try:
return str(target_resolved.relative_to(root_resolved))
except ValueError as exc:
raise InfospaceError(
"artifact_path_escapes_infospace",
f"Artifact path escapes infospace root: {path}",
{"root": str(root), "path": str(path)},
) from exc
if raw.is_absolute():
candidates = [raw.resolve()]
else:
cwd_candidate = raw.resolve()
joined_candidate = (root / raw).resolve()
candidates = [cwd_candidate]
if joined_candidate != cwd_candidate:
candidates.append(joined_candidate)
for candidate in candidates:
try:
return str(candidate.relative_to(root_resolved))
except ValueError:
continue
raise InfospaceError(
"artifact_path_escapes_infospace",
f"Artifact path escapes infospace root: {path}",
{"root": str(root), "path": str(path)},
)
def _write_yaml(path: Path, data: dict[str, Any]) -> None: