generated from coulomb/repo-seed
fix(lifecycle): _relative_to_root path doubling with relative workspace
fix(evaluation_io): tolerate code-fenced frontmatter and varied score
shapes from small LLMs
Two bugs surfaced running the first live Lefevre chapter-I smoke
against openai/gpt-4o-mini.
1. _relative_to_root doubled artifact paths when --workspace was a
relative path (e.g. "."). The function received an already-CWD-
relative path like infospaces/foo/artifacts/sources/x.md and
re-prepended root, producing infospaces/foo/infospaces/foo/...
stored in artifacts/index.yaml — which then failed file reads on
the subsequent workflow stage. Fix: when raw is relative, try
CWD-relative resolution first (matches root / sub call shapes);
fall back to root-prefixing only when the CWD interpretation does
not land under root (matches bare relative-subpath call shapes
from rendered template outputs).
2. _read_frontmatter_markdown only accepted a literal ---/---
delimited block at the start of the file. gpt-4o-mini emitted three
other shapes across the seven evaluation files this chapter
produced:
- ```yaml ... ``` fence (no --- delimiters)
- ```markdown ... ``` outer fence wrapping --- frontmatter
- scores as mapping ({groundedness: 4, ...}) instead of the
canonical list of {name, value} dicts
- scores as list of single-key dicts ([{groundedness: 4}, ...])
Fix: _extract_frontmatter_block tolerates ```yaml fences and strips
```markdown outer fences; _normalise_scores rewrites mapping- and
single-key-dict shapes into the canonical form so ScoreEntry.from_dict
keeps working.
Both fixes are pure-Python; no API changes. 179 tests pass, 2 skipped.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -136,21 +136,7 @@ def read_history(history_path: str | Path) -> list[EvaluationSnapshot]:
|
|||||||
|
|
||||||
def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
|
def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
|
||||||
text = path.read_text(encoding="utf-8")
|
text = path.read_text(encoding="utf-8")
|
||||||
if not text.startswith(f"{FRONTMATTER_MARKER}\n"):
|
raw, body = _extract_frontmatter_block(text, path)
|
||||||
raise InfospaceError(
|
|
||||||
"invalid_evaluation_file",
|
|
||||||
f"Missing YAML frontmatter in evaluation file: {path}",
|
|
||||||
{"path": str(path)},
|
|
||||||
)
|
|
||||||
end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
|
|
||||||
if end == -1:
|
|
||||||
raise InfospaceError(
|
|
||||||
"invalid_evaluation_file",
|
|
||||||
f"Unclosed YAML frontmatter in evaluation file: {path}",
|
|
||||||
{"path": str(path)},
|
|
||||||
)
|
|
||||||
raw = text[len(FRONTMATTER_MARKER) + 1 : end]
|
|
||||||
body = text[end + len(FRONTMATTER_MARKER) + 2 :]
|
|
||||||
data = yaml.safe_load(raw)
|
data = yaml.safe_load(raw)
|
||||||
if not isinstance(data, dict):
|
if not isinstance(data, dict):
|
||||||
raise InfospaceError(
|
raise InfospaceError(
|
||||||
@@ -158,9 +144,105 @@ def _read_frontmatter_markdown(path: Path) -> tuple[dict[str, Any], str]:
|
|||||||
f"Expected mapping frontmatter in evaluation file: {path}",
|
f"Expected mapping frontmatter in evaluation file: {path}",
|
||||||
{"path": str(path)},
|
{"path": str(path)},
|
||||||
)
|
)
|
||||||
|
_normalise_scores(data)
|
||||||
return data, body
|
return data, body
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_scores(data: dict[str, Any]) -> None:
|
||||||
|
"""Normalise score shapes emitted by various LLMs into the canonical
|
||||||
|
list-of-{name, value} form the rest of the pipeline expects.
|
||||||
|
|
||||||
|
Handles three variants beyond the canonical:
|
||||||
|
|
||||||
|
- mapping form: ``scores: {groundedness: 5, lesson_clarity: 4}``
|
||||||
|
- list of single-key dicts: ``[{groundedness: 4}, {lesson_clarity: 3}]``
|
||||||
|
- list of canonical dicts (left as-is)
|
||||||
|
"""
|
||||||
|
scores = data.get("scores")
|
||||||
|
if isinstance(scores, dict):
|
||||||
|
data["scores"] = [
|
||||||
|
{"name": str(name), "value": _coerce_score(value)}
|
||||||
|
for name, value in scores.items()
|
||||||
|
]
|
||||||
|
elif isinstance(scores, list):
|
||||||
|
normalised: list[dict[str, Any]] = []
|
||||||
|
for item in scores:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
if "name" in item and "value" in item:
|
||||||
|
normalised.append(item)
|
||||||
|
elif len(item) == 1:
|
||||||
|
(name, value), = item.items()
|
||||||
|
normalised.append({"name": str(name), "value": _coerce_score(value)})
|
||||||
|
else:
|
||||||
|
normalised.append(item)
|
||||||
|
data["scores"] = normalised
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_score(value: Any) -> float:
|
||||||
|
try:
|
||||||
|
return float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_frontmatter_block(text: str, path: Path) -> tuple[str, str]:
|
||||||
|
"""Pull a YAML frontmatter block out of an evaluation file.
|
||||||
|
|
||||||
|
Tolerates several shapes commonly produced by LLMs:
|
||||||
|
|
||||||
|
- the canonical ``---``-delimited block at the start of the file
|
||||||
|
- a ``` ```yaml ... ``` `` code fence at the start of the file
|
||||||
|
- a ``` ```markdown ... ``` `` outer fence wrapping ``---`` frontmatter
|
||||||
|
"""
|
||||||
|
stripped_text = text.lstrip("\n")
|
||||||
|
# Strip an outer ```markdown / ```md fence if present and recurse on its
|
||||||
|
# body so any ``---`` frontmatter inside still gets recognised.
|
||||||
|
for outer_marker in ("```markdown\n", "```md\n"):
|
||||||
|
if stripped_text.startswith(outer_marker):
|
||||||
|
inner_start = len(outer_marker)
|
||||||
|
closing_idx = stripped_text.rfind("```")
|
||||||
|
if closing_idx <= inner_start:
|
||||||
|
break
|
||||||
|
inner = stripped_text[inner_start:closing_idx].rstrip()
|
||||||
|
return _extract_frontmatter_block(inner, path)
|
||||||
|
|
||||||
|
if stripped_text.startswith(f"{FRONTMATTER_MARKER}\n"):
|
||||||
|
text = stripped_text
|
||||||
|
end = text.find(f"\n{FRONTMATTER_MARKER}\n", len(FRONTMATTER_MARKER) + 1)
|
||||||
|
if end == -1:
|
||||||
|
# Also accept a closing fence at EOF without a trailing newline.
|
||||||
|
if text.rstrip().endswith(FRONTMATTER_MARKER):
|
||||||
|
end = text.rstrip().rfind(FRONTMATTER_MARKER) - 1
|
||||||
|
else:
|
||||||
|
raise InfospaceError(
|
||||||
|
"invalid_evaluation_file",
|
||||||
|
f"Unclosed YAML frontmatter in evaluation file: {path}",
|
||||||
|
{"path": str(path)},
|
||||||
|
)
|
||||||
|
raw = text[len(FRONTMATTER_MARKER) + 1 : end]
|
||||||
|
body = text[end + len(FRONTMATTER_MARKER) + 2 :]
|
||||||
|
return raw, body
|
||||||
|
if stripped_text.startswith("```yaml") or stripped_text.startswith("```yml"):
|
||||||
|
fence_start = stripped_text.find("```")
|
||||||
|
content_start = stripped_text.find("\n", fence_start) + 1
|
||||||
|
fence_end = stripped_text.find("\n```", content_start)
|
||||||
|
if fence_end == -1:
|
||||||
|
raise InfospaceError(
|
||||||
|
"invalid_evaluation_file",
|
||||||
|
f"Unclosed YAML code fence in evaluation file: {path}",
|
||||||
|
{"path": str(path)},
|
||||||
|
)
|
||||||
|
raw = stripped_text[content_start:fence_end]
|
||||||
|
body = stripped_text[fence_end + len("\n```") :]
|
||||||
|
return raw, body.lstrip("\n")
|
||||||
|
raise InfospaceError(
|
||||||
|
"invalid_evaluation_file",
|
||||||
|
f"Missing YAML frontmatter in evaluation file: {path}",
|
||||||
|
{"path": str(path)},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_rationales(body: str) -> dict[str, str]:
|
def _parse_rationales(body: str) -> dict[str, str]:
|
||||||
rationales: dict[str, str] = {}
|
rationales: dict[str, str] = {}
|
||||||
current_name: str | None = None
|
current_name: str | None = None
|
||||||
|
|||||||
@@ -219,18 +219,37 @@ def _read_yaml(path: Path) -> dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def _relative_to_root(root: Path, path: Path | str) -> str:
|
def _relative_to_root(root: Path, path: Path | str) -> str:
|
||||||
|
"""Return ``path`` relative to ``root``, accepting either call shape.
|
||||||
|
|
||||||
|
Callers pass either a fully-resolved ``root / sub`` style path or a
|
||||||
|
bare ``sub`` path that should be interpreted relative to ``root``.
|
||||||
|
With a relative ``root`` the old single-interpretation logic produced
|
||||||
|
a doubled path (e.g. ``infospaces/foo/infospaces/foo/...``) because it
|
||||||
|
re-prepended ``root`` to a path that was already under ``root`` when
|
||||||
|
resolved from CWD. The fix tries the CWD interpretation first and only
|
||||||
|
falls back to root-prefixing when the CWD interpretation doesn't land
|
||||||
|
under ``root``.
|
||||||
|
"""
|
||||||
raw = Path(path)
|
raw = Path(path)
|
||||||
target = raw if raw.is_absolute() else root / raw
|
|
||||||
root_resolved = root.resolve()
|
root_resolved = root.resolve()
|
||||||
target_resolved = target.resolve()
|
if raw.is_absolute():
|
||||||
try:
|
candidates = [raw.resolve()]
|
||||||
return str(target_resolved.relative_to(root_resolved))
|
else:
|
||||||
except ValueError as exc:
|
cwd_candidate = raw.resolve()
|
||||||
raise InfospaceError(
|
joined_candidate = (root / raw).resolve()
|
||||||
"artifact_path_escapes_infospace",
|
candidates = [cwd_candidate]
|
||||||
f"Artifact path escapes infospace root: {path}",
|
if joined_candidate != cwd_candidate:
|
||||||
{"root": str(root), "path": str(path)},
|
candidates.append(joined_candidate)
|
||||||
) from exc
|
for candidate in candidates:
|
||||||
|
try:
|
||||||
|
return str(candidate.relative_to(root_resolved))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
raise InfospaceError(
|
||||||
|
"artifact_path_escapes_infospace",
|
||||||
|
f"Artifact path escapes infospace root: {path}",
|
||||||
|
{"root": str(root), "path": str(path)},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _write_yaml(path: Path, data: dict[str, Any]) -> None:
|
def _write_yaml(path: Path, data: dict[str, Any]) -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user