eval history and metrics

This commit is contained in:
2026-05-14 15:35:04 +02:00
parent d0c1f82863
commit 7f54dec585
9 changed files with 870 additions and 16 deletions

View File

@@ -5,7 +5,9 @@ import json
import sys
from pathlib import Path
from .checks import run_collection_checks
from .errors import InfospaceError
from .history import find_snapshot, get_history, metric_trend, record_check_results
from .lifecycle import add_artifact, create_infospace, load_infospace
from .markdown_adapter import validate_infospace_artifacts
from .semantics import list_entities, list_relations
@@ -42,6 +44,24 @@ def build_parser() -> argparse.ArgumentParser:
relations = sub.add_parser("relations", help="List parsed relation artifacts")
relations.add_argument("root")
history = sub.add_parser("history", help="List evaluation snapshot history")
history.add_argument("root")
history.add_argument("--metric", default="")
history_diff = sub.add_parser(
"history-diff",
help="Diff two evaluation snapshots by snapshot ID or date",
)
history_diff.add_argument("root")
history_diff.add_argument("before")
history_diff.add_argument("after")
metrics = sub.add_parser(
"metrics",
help="Run collection checks and persist metrics/history",
)
metrics.add_argument("root")
return parser
@@ -96,6 +116,40 @@ def main(argv: list[str] | None = None) -> int:
]
}
)
elif args.command == "history":
history = get_history(Path(args.root))
if args.metric:
_write_json(
{
"metric": args.metric,
"trend": metric_trend(history, args.metric),
}
)
else:
_write_json({"history": [item.to_dict() for item in history]})
elif args.command == "history-diff":
history = get_history(Path(args.root))
before = find_snapshot(history, args.before)
after = find_snapshot(history, args.after)
if before is None or after is None:
missing = []
if before is None:
missing.append(args.before)
if after is None:
missing.append(args.after)
raise InfospaceError(
"missing_snapshot",
"Could not resolve requested snapshot reference",
{"missing_refs": missing},
)
_write_json({"diff": before.diff(after).to_dict()})
elif args.command == "metrics":
infospace = load_infospace(Path(args.root))
result = record_check_results(
infospace.root,
run_collection_checks(infospace.artifacts),
)
_write_json(result.to_dict())
else:
parser.error(f"Unhandled command: {args.command}")
except InfospaceError as exc: