generated from coulomb/repo-seed
fix(lifecycle): _relative_to_root path doubling with relative workspace
fix(evaluation_io): tolerate code-fenced frontmatter and varied score
shapes from small LLMs
Two bugs surfaced running the first live Lefevre chapter-I smoke
against openai/gpt-4o-mini.
1. _relative_to_root doubled artifact paths when --workspace was a
relative path (e.g. "."). The function received an already-CWD-
relative path like infospaces/foo/artifacts/sources/x.md and
re-prepended root, producing infospaces/foo/infospaces/foo/...
stored in artifacts/index.yaml — which then failed file reads on
the subsequent workflow stage. Fix: when raw is relative, try
CWD-relative resolution first (matches root / sub call shapes);
fall back to root-prefixing only when the CWD interpretation does
not land under root (matches bare relative-subpath call shapes
from rendered template outputs).
2. _read_frontmatter_markdown only accepted a literal ---/---
delimited block at the start of the file. gpt-4o-mini emitted three
other shapes across the seven evaluation files this chapter
produced:
- ```yaml ... ``` fence (no --- delimiters)
- ```markdown ... ``` outer fence wrapping --- frontmatter
- scores as mapping ({groundedness: 4, ...}) instead of the
canonical list of {name, value} dicts
- scores as list of single-key dicts ([{groundedness: 4}, ...])
Fix: _extract_frontmatter_block tolerates ```yaml fences and strips
```markdown outer fences; _normalise_scores rewrites mapping- and
single-key-dict shapes into the canonical form so ScoreEntry.from_dict
keeps working.
Both fixes are pure-Python; no API changes. 179 tests pass, 2 skipped.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -136,21 +136,7 @@ def read_history(history_path: str | Path) -> list[EvaluationSnapshot]:
|
||||
|
||||
def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
if not text.startswith(f"{FRONTMATTER_MARKER}\n"):
|
||||
raise InfospaceError(
|
||||
"invalid_evaluation_file",
|
||||
f"Missing YAML frontmatter in evaluation file: {path}",
|
||||
{"path": str(path)},
|
||||
)
|
||||
end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
|
||||
if end == -1:
|
||||
raise InfospaceError(
|
||||
"invalid_evaluation_file",
|
||||
f"Unclosed YAML frontmatter in evaluation file: {path}",
|
||||
{"path": str(path)},
|
||||
)
|
||||
raw = text[len(FRONTMATTER_MARKER) + 1 : end]
|
||||
body = text[end + len(FRONTMATTER_MARKER) + 2 :]
|
||||
raw, body = _extract_frontmatter_block(text, path)
|
||||
data = yaml.safe_load(raw)
|
||||
if not isinstance(data, dict):
|
||||
raise InfospaceError(
|
||||
@@ -158,9 +144,105 @@ def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
|
||||
f"Expected mapping frontmatter in evaluation file: {path}",
|
||||
{"path": str(path)},
|
||||
)
|
||||
_normalise_scores(data)
|
||||
return data, body
|
||||
|
||||
|
||||
def _normalise_scores(data: dict[str, Any]) -> None:
|
||||
"""Normalise score shapes emitted by various LLMs into the canonical
|
||||
list-of-{name, value} form the rest of the pipeline expects.
|
||||
|
||||
Handles three variants beyond the canonical:
|
||||
|
||||
- mapping form: ``scores: {groundedness: 5, lesson_clarity: 4}``
|
||||
- list of single-key dicts: ``[{groundedness: 4}, {lesson_clarity: 3}]``
|
||||
- list of canonical dicts (left as-is)
|
||||
"""
|
||||
scores = data.get("scores")
|
||||
if isinstance(scores, dict):
|
||||
data["scores"] = [
|
||||
{"name": str(name), "value": _coerce_score(value)}
|
||||
for name, value in scores.items()
|
||||
]
|
||||
elif isinstance(scores, list):
|
||||
normalised: list[dict[str, Any]] = []
|
||||
for item in scores:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if "name" in item and "value" in item:
|
||||
normalised.append(item)
|
||||
elif len(item) == 1:
|
||||
(name, value), = item.items()
|
||||
normalised.append({"name": str(name), "value": _coerce_score(value)})
|
||||
else:
|
||||
normalised.append(item)
|
||||
data["scores"] = normalised
|
||||
|
||||
|
||||
def _coerce_score(value: Any) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _extract_frontmatter_block(text: str, path: Path) -> tuple[str, str]:
|
||||
"""Pull a YAML frontmatter block out of an evaluation file.
|
||||
|
||||
Tolerates several shapes commonly produced by LLMs:
|
||||
|
||||
- the canonical ``---``-delimited block at the start of the file
|
||||
- a ``` ```yaml ... ``` `` code fence at the start of the file
|
||||
- a ``` ```markdown ... ``` `` outer fence wrapping ``---`` frontmatter
|
||||
"""
|
||||
stripped_text = text.lstrip("\n")
|
||||
# Strip an outer ```markdown / ```md fence if present and recurse on its
|
||||
# body so any ``---`` frontmatter inside still gets recognised.
|
||||
for outer_marker in ("```markdown\n", "```md\n"):
|
||||
if stripped_text.startswith(outer_marker):
|
||||
inner_start = len(outer_marker)
|
||||
closing_idx = stripped_text.rfind("```")
|
||||
if closing_idx <= inner_start:
|
||||
break
|
||||
inner = stripped_text[inner_start:closing_idx].rstrip()
|
||||
return _extract_frontmatter_block(inner, path)
|
||||
|
||||
if stripped_text.startswith(f"{FRONTMATTER_MARKER}\n"):
|
||||
text = stripped_text
|
||||
end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
|
||||
if end == -1:
|
||||
# Also accept a closing fence at EOF without a trailing newline.
|
||||
if text.rstrip().endswith(FRONTMATTER_MARKER):
|
||||
end = text.rstrip().rfind(FRONTMATTER_MARKER) - 1
|
||||
else:
|
||||
raise InfospaceError(
|
||||
"invalid_evaluation_file",
|
||||
f"Unclosed YAML frontmatter in evaluation file: {path}",
|
||||
{"path": str(path)},
|
||||
)
|
||||
raw = text[len(FRONTMATTER_MARKER) + 1 : end]
|
||||
body = text[end + len(FRONTMATTER_MARKER) + 2 :]
|
||||
return raw, body
|
||||
if stripped_text.startswith("```yaml") or stripped_text.startswith("```yml"):
|
||||
fence_start = stripped_text.find("```")
|
||||
content_start = stripped_text.find("\n", fence_start) + 1
|
||||
fence_end = stripped_text.find("\n```", content_start)
|
||||
if fence_end == -1:
|
||||
raise InfospaceError(
|
||||
"invalid_evaluation_file",
|
||||
f"Unclosed YAML code fence in evaluation file: {path}",
|
||||
{"path": str(path)},
|
||||
)
|
||||
raw = stripped_text[content_start:fence_end]
|
||||
body = stripped_text[fence_end + len("\n```") :]
|
||||
return raw, body.lstrip("\n")
|
||||
raise InfospaceError(
|
||||
"invalid_evaluation_file",
|
||||
f"Missing YAML frontmatter in evaluation file: {path}",
|
||||
{"path": str(path)},
|
||||
)
|
||||
|
||||
|
||||
def _parse_rationales(body: str) -> dict[str, str]:
|
||||
rationales: dict[str, str] = {}
|
||||
current_name: str | None = None
|
||||
|
||||
@@ -219,18 +219,37 @@ def _read_yaml(path: Path) -> dict[str, Any]:
|
||||
|
||||
|
||||
def _relative_to_root(root: Path, path: Path | str) -> str:
|
||||
"""Return ``path`` relative to ``root``, accepting either call shape.
|
||||
|
||||
Callers pass either a fully-resolved ``root / sub`` style path or a
|
||||
bare ``sub`` path that should be interpreted relative to ``root``.
|
||||
With a relative ``root`` the old single-interpretation logic produced
|
||||
a doubled path (e.g. ``infospaces/foo/infospaces/foo/...``) because it
|
||||
re-prepended ``root`` to a path that was already under ``root`` when
|
||||
resolved from CWD. The fix tries the CWD interpretation first and only
|
||||
falls back to root-prefixing when the CWD interpretation doesn't land
|
||||
under ``root``.
|
||||
"""
|
||||
raw = Path(path)
|
||||
target = raw if raw.is_absolute() else root / raw
|
||||
root_resolved = root.resolve()
|
||||
target_resolved = target.resolve()
|
||||
try:
|
||||
return str(target_resolved.relative_to(root_resolved))
|
||||
except ValueError as exc:
|
||||
raise InfospaceError(
|
||||
"artifact_path_escapes_infospace",
|
||||
f"Artifact path escapes infospace root: {path}",
|
||||
{"root": str(root), "path": str(path)},
|
||||
) from exc
|
||||
if raw.is_absolute():
|
||||
candidates = [raw.resolve()]
|
||||
else:
|
||||
cwd_candidate = raw.resolve()
|
||||
joined_candidate = (root / raw).resolve()
|
||||
candidates = [cwd_candidate]
|
||||
if joined_candidate != cwd_candidate:
|
||||
candidates.append(joined_candidate)
|
||||
for candidate in candidates:
|
||||
try:
|
||||
return str(candidate.relative_to(root_resolved))
|
||||
except ValueError:
|
||||
continue
|
||||
raise InfospaceError(
|
||||
"artifact_path_escapes_infospace",
|
||||
f"Artifact path escapes infospace root: {path}",
|
||||
{"root": str(root), "path": str(path)},
|
||||
)
|
||||
|
||||
|
||||
def _write_yaml(path: Path, data: dict[str, Any]) -> None:
|
||||
|
||||
Reference in New Issue
Block a user