generated from coulomb/repo-seed
Add self-scoping review UI
This commit is contained in:
@@ -13,6 +13,8 @@ instead of relying on memory or screenshots.
|
||||
repo-scoping capability truth.
|
||||
- `workflow.md` explains how to run challenger assessments, interpret outcomes,
|
||||
and decide whether to update the golden profile or fix the engine.
|
||||
- `outcomes/` stores append-only reviewer decisions created from side-by-side
|
||||
comparisons.
|
||||
- `../schemas/self-scoping-assessment.schema.json` defines the immutable
|
||||
assessment-run artifact shape.
|
||||
|
||||
@@ -35,6 +37,12 @@ assessment. Compare the challenger to the golden profile and to the negative
|
||||
seed. Reviewers should be able to choose whether the old result, new result, or
|
||||
neither is better, then store that judgement as a new assessment outcome.
|
||||
|
||||
The curator UI exposes this loop at `/ui/self-scoping`. It reads the golden and
|
||||
assessment JSON files from this directory, highlights missing, forbidden, and
|
||||
misplaced hierarchy entries, and records reviewer preference without mutating
|
||||
the compared artifacts. The same page can compare two assessment runs directly
|
||||
so reviewers can choose whether the old baseline or new challenger is better.
|
||||
|
||||
## Export Command
|
||||
|
||||
Export a completed analysis run as a challenger artifact:
|
||||
|
||||
9
docs/self-scoping/outcomes/README.md
Normal file
9
docs/self-scoping/outcomes/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
# Self-Scoping Review Outcomes
|
||||
|
||||
This directory stores append-only review decisions recorded from the
|
||||
self-scoping comparison UI. Outcome files bind a reviewer choice to a golden
|
||||
profile, an assessment artifact, and the repo-scoping engine identity captured
|
||||
in that assessment.
|
||||
|
||||
Do not edit historical assessment artifacts to record a preference. Add a new
|
||||
outcome record instead.
|
||||
@@ -56,17 +56,25 @@ are committed.
|
||||
|
||||
2. Read the comparison report.
|
||||
|
||||
3. If the report says `regression`, inspect forbidden capabilities, misplaced
|
||||
3. Open the curator UI at `/ui/self-scoping` to compare the golden profile and
|
||||
assessment artifact side by side.
|
||||
|
||||
4. When an earlier baseline assessment exists, use the same page's two-run
|
||||
comparison to judge old output against the new challenger.
|
||||
|
||||
5. If the report says `regression`, inspect forbidden capabilities, misplaced
|
||||
features, and known regression patterns first.
|
||||
|
||||
4. If the report says `needs_review`, inspect missing expected capabilities and
|
||||
6. If the report says `needs_review`, inspect missing expected capabilities and
|
||||
source evidence before choosing old or new output.
|
||||
|
||||
5. If the report says `candidate_improvement`, still confirm that the
|
||||
7. If the report says `candidate_improvement`, still confirm that the
|
||||
hierarchy, source refs, and native-utility boundaries make sense.
|
||||
|
||||
6. Record the decision as an assessment outcome before changing the active
|
||||
baseline.
|
||||
8. Record the decision as an assessment outcome before changing the active
|
||||
baseline. The UI writes append-only outcome records under
|
||||
`docs/self-scoping/outcomes/`; it does not rewrite historical assessment or
|
||||
golden-profile artifacts.
|
||||
|
||||
## CI Use
|
||||
|
||||
|
||||
@@ -1,4 +1,13 @@
|
||||
from repo_registry.self_scoping.assessment import export_assessment_artifact
|
||||
from repo_registry.self_scoping.comparison import compare_assessment_to_golden
|
||||
from repo_registry.self_scoping.review_store import (
|
||||
record_assessment_outcome,
|
||||
record_assessment_pair_outcome,
|
||||
)
|
||||
|
||||
__all__ = ["compare_assessment_to_golden", "export_assessment_artifact"]
|
||||
__all__ = [
|
||||
"compare_assessment_to_golden",
|
||||
"export_assessment_artifact",
|
||||
"record_assessment_outcome",
|
||||
"record_assessment_pair_outcome",
|
||||
]
|
||||
|
||||
217
src/repo_registry/self_scoping/review_store.py
Normal file
217
src/repo_registry/self_scoping/review_store.py
Normal file
@@ -0,0 +1,217 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
SELF_SCOPING_ROOT_ENV = "REPO_REGISTRY_SELF_SCOPING_ROOT"
|
||||
OUTCOME_SCHEMA_VERSION = "self-scoping-review-outcome/v1"
|
||||
ALLOWED_OUTCOMES = {
|
||||
"prefer_golden",
|
||||
"prefer_assessment",
|
||||
"prefer_baseline",
|
||||
"prefer_challenger",
|
||||
"tie",
|
||||
"needs_human",
|
||||
"reject_assessment",
|
||||
"reject_challenger",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReviewArtifact:
|
||||
path: str
|
||||
artifact_id: str
|
||||
title: str
|
||||
updated_at: str
|
||||
|
||||
|
||||
def self_scoping_root(root: str | Path | None = None) -> Path:
|
||||
configured = root or os.environ.get(SELF_SCOPING_ROOT_ENV) or "docs/self-scoping"
|
||||
return Path(configured).resolve()
|
||||
|
||||
|
||||
def list_golden_profiles(root: str | Path | None = None) -> list[ReviewArtifact]:
|
||||
return _list_artifacts("golden", root=root)
|
||||
|
||||
|
||||
def list_assessment_artifacts(root: str | Path | None = None) -> list[ReviewArtifact]:
|
||||
return _list_artifacts("assessments", root=root)
|
||||
|
||||
|
||||
def load_json_artifact(
|
||||
relative_path: str,
|
||||
root: str | Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
artifact_path = _safe_artifact_path(relative_path, root=root)
|
||||
return json.loads(artifact_path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def list_outcome_records(root: str | Path | None = None) -> list[dict[str, Any]]:
|
||||
outcomes_dir = self_scoping_root(root) / "outcomes"
|
||||
if not outcomes_dir.exists():
|
||||
return []
|
||||
records: list[dict[str, Any]] = []
|
||||
for path in sorted(outcomes_dir.glob("*.json"), reverse=True):
|
||||
try:
|
||||
records.append(json.loads(path.read_text(encoding="utf-8")))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return records
|
||||
|
||||
|
||||
def record_assessment_outcome(
|
||||
*,
|
||||
golden_path: str,
|
||||
assessment_path: str,
|
||||
outcome: str,
|
||||
reviewer: str,
|
||||
notes: str,
|
||||
comparison_status: str,
|
||||
root: str | Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
if outcome not in ALLOWED_OUTCOMES:
|
||||
raise ValueError(f"unsupported review outcome: {outcome}")
|
||||
|
||||
base = self_scoping_root(root)
|
||||
golden = load_json_artifact(golden_path, root=base)
|
||||
assessment = load_json_artifact(assessment_path, root=base)
|
||||
created_at = _created_at()
|
||||
outcome_id = _outcome_id(created_at, assessment_path, outcome)
|
||||
record = {
|
||||
"schema_version": OUTCOME_SCHEMA_VERSION,
|
||||
"outcome_id": outcome_id,
|
||||
"created_at": created_at,
|
||||
"reviewer": reviewer.strip() or "codex",
|
||||
"outcome": outcome,
|
||||
"notes": notes.strip(),
|
||||
"comparison_status": comparison_status,
|
||||
"golden_profile_path": golden_path,
|
||||
"golden_profile_id": golden.get("profile_id", ""),
|
||||
"assessment_artifact_path": assessment_path,
|
||||
"assessment_artifact_id": assessment.get("artifact_id", ""),
|
||||
"engine_identity": assessment.get("engine_identity", {}),
|
||||
"decision_scope": "baseline-comparison",
|
||||
}
|
||||
|
||||
_write_outcome(record, base)
|
||||
return record
|
||||
|
||||
|
||||
def record_assessment_pair_outcome(
|
||||
*,
|
||||
baseline_path: str,
|
||||
challenger_path: str,
|
||||
outcome: str,
|
||||
reviewer: str,
|
||||
notes: str,
|
||||
comparison_status: str,
|
||||
root: str | Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
if outcome not in ALLOWED_OUTCOMES:
|
||||
raise ValueError(f"unsupported review outcome: {outcome}")
|
||||
|
||||
base = self_scoping_root(root)
|
||||
baseline = load_json_artifact(baseline_path, root=base)
|
||||
challenger = load_json_artifact(challenger_path, root=base)
|
||||
created_at = _created_at()
|
||||
outcome_id = _outcome_id(
|
||||
created_at,
|
||||
f"{Path(baseline_path).stem}__{Path(challenger_path).stem}",
|
||||
outcome,
|
||||
)
|
||||
record = {
|
||||
"schema_version": OUTCOME_SCHEMA_VERSION,
|
||||
"outcome_id": outcome_id,
|
||||
"created_at": created_at,
|
||||
"reviewer": reviewer.strip() or "codex",
|
||||
"outcome": outcome,
|
||||
"notes": notes.strip(),
|
||||
"comparison_status": comparison_status,
|
||||
"baseline_assessment_path": baseline_path,
|
||||
"baseline_assessment_artifact_id": baseline.get("artifact_id", ""),
|
||||
"baseline_engine_identity": baseline.get("engine_identity", {}),
|
||||
"challenger_assessment_path": challenger_path,
|
||||
"challenger_assessment_artifact_id": challenger.get("artifact_id", ""),
|
||||
"challenger_engine_identity": challenger.get("engine_identity", {}),
|
||||
"decision_scope": "assessment-pair-comparison",
|
||||
}
|
||||
_write_outcome(record, base)
|
||||
return record
|
||||
|
||||
|
||||
def _created_at() -> str:
|
||||
return (
|
||||
datetime.now(UTC)
|
||||
.replace(microsecond=0)
|
||||
.isoformat()
|
||||
.replace("+00:00", "Z")
|
||||
)
|
||||
|
||||
|
||||
def _write_outcome(record: dict[str, Any], base: Path) -> None:
|
||||
outcomes_dir = base / "outcomes"
|
||||
outcomes_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = outcomes_dir / f"{record['outcome_id']}.json"
|
||||
output_path.write_text(
|
||||
json.dumps(record, indent=2, sort_keys=True) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _list_artifacts(kind: str, root: str | Path | None = None) -> list[ReviewArtifact]:
|
||||
base = self_scoping_root(root)
|
||||
artifacts: list[ReviewArtifact] = []
|
||||
for path in sorted((base / kind).glob("*.json")):
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
artifacts.append(
|
||||
ReviewArtifact(
|
||||
path=path.relative_to(base).as_posix(),
|
||||
artifact_id=str(
|
||||
payload.get("artifact_id") or payload.get("profile_id") or path.stem
|
||||
),
|
||||
title=str(
|
||||
payload.get("title")
|
||||
or payload.get("assessment", {}).get("summary")
|
||||
or payload.get("artifact_type")
|
||||
or path.stem
|
||||
),
|
||||
updated_at=str(
|
||||
payload.get("updated_at") or payload.get("created_at") or ""
|
||||
),
|
||||
)
|
||||
)
|
||||
return artifacts
|
||||
|
||||
|
||||
def _safe_artifact_path(relative_path: str, root: str | Path | None = None) -> Path:
|
||||
base = self_scoping_root(root)
|
||||
artifact_path = (base / relative_path).resolve()
|
||||
try:
|
||||
artifact_path.relative_to(base)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"artifact path escapes self-scoping root: {relative_path}") from exc
|
||||
if artifact_path.suffix != ".json":
|
||||
raise ValueError(f"artifact path is not JSON: {relative_path}")
|
||||
if not artifact_path.exists():
|
||||
raise FileNotFoundError(relative_path)
|
||||
return artifact_path
|
||||
|
||||
|
||||
def _outcome_id(created_at: str, assessment_path: str, outcome: str) -> str:
|
||||
timestamp = (
|
||||
created_at.replace("-", "")
|
||||
.replace(":", "")
|
||||
.replace("T", "-")
|
||||
.replace("Z", "")
|
||||
)
|
||||
assessment_stem = Path(assessment_path).stem.replace(".", "-")
|
||||
return f"{timestamp}__{assessment_stem}__{outcome}__{uuid4().hex[:8]}"
|
||||
@@ -10,12 +10,36 @@ from fastapi import APIRouter, Depends, Form, HTTPException, Query
|
||||
from fastapi.responses import HTMLResponse, PlainTextResponse, RedirectResponse
|
||||
|
||||
from repo_registry.core.service import RegistryService
|
||||
from repo_registry.self_scoping.comparison import compare_assessment_to_golden
|
||||
from repo_registry.self_scoping.review_store import (
|
||||
ALLOWED_OUTCOMES,
|
||||
list_assessment_artifacts,
|
||||
list_golden_profiles,
|
||||
list_outcome_records,
|
||||
load_json_artifact,
|
||||
record_assessment_outcome,
|
||||
record_assessment_pair_outcome,
|
||||
)
|
||||
from repo_registry.storage.sqlite import NotFoundError
|
||||
from repo_registry.web_api.app import get_service
|
||||
|
||||
|
||||
router = APIRouter(include_in_schema=False)
|
||||
APP_NAME = "Repository Scoping"
|
||||
REVIEW_OUTCOME_LABELS = {
|
||||
"prefer_golden": "Prefer Golden",
|
||||
"prefer_assessment": "Prefer Assessment",
|
||||
"tie": "Tie",
|
||||
"needs_human": "Needs Human Review",
|
||||
"reject_assessment": "Reject Assessment",
|
||||
}
|
||||
PAIR_REVIEW_OUTCOME_LABELS = {
|
||||
"prefer_baseline": "Prefer Baseline",
|
||||
"prefer_challenger": "Prefer Challenger",
|
||||
"tie": "Tie",
|
||||
"needs_human": "Needs Human Review",
|
||||
"reject_challenger": "Reject Challenger",
|
||||
}
|
||||
|
||||
|
||||
def repository_directory_name(url: str, fallback: str) -> str:
|
||||
@@ -188,6 +212,29 @@ def page(
|
||||
}}
|
||||
.tree ul {{ margin: 8px 0 0 20px; padding: 0; }}
|
||||
.tree li {{ margin: 6px 0; }}
|
||||
.review-grid {{
|
||||
display: grid;
|
||||
grid-template-columns: minmax(0, 1fr) minmax(0, 1fr);
|
||||
gap: 18px;
|
||||
align-items: start;
|
||||
}}
|
||||
.review-item {{
|
||||
border-left: 3px solid var(--line);
|
||||
padding: 8px 0 8px 10px;
|
||||
margin: 8px 0;
|
||||
}}
|
||||
.review-item.match {{ border-color: #10b981; }}
|
||||
.review-item.problem {{ border-color: var(--danger); background: #fffafa; }}
|
||||
.review-item.warn {{ border-color: #f59e0b; background: #fffaf0; }}
|
||||
.review-item h3 {{ margin-top: 0; }}
|
||||
.review-list {{ margin: 8px 0 0 18px; padding: 0; }}
|
||||
.review-list .warn {{ color: var(--warn); font-weight: 650; }}
|
||||
.review-meta {{
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
flex-wrap: wrap;
|
||||
align-items: center;
|
||||
}}
|
||||
.source {{ color: var(--muted); font-family: ui-monospace, SFMono-Regular, Consolas, monospace; font-size: 12px; }}
|
||||
.scope-document {{
|
||||
margin: 0;
|
||||
@@ -279,6 +326,7 @@ def page(
|
||||
header {{ padding: 12px 16px; }}
|
||||
main {{ padding: 16px; }}
|
||||
.grid {{ grid-template-columns: 1fr; }}
|
||||
.review-grid {{ grid-template-columns: 1fr; }}
|
||||
.graph-shell {{ grid-template-columns: 1fr; }}
|
||||
.graph-canvas {{ min-height: 560px; }}
|
||||
table, tbody, tr, td {{ display: block; width: 100%; }}
|
||||
@@ -297,6 +345,7 @@ def page(
|
||||
<nav class="actions">
|
||||
<a href="/ui/search">Search</a>
|
||||
<a href="/ui/discovery">Discovery</a>
|
||||
<a href="/ui/self-scoping">Self-Scoping</a>
|
||||
<a href="/docs">API Docs</a>
|
||||
</nav>
|
||||
</header>
|
||||
@@ -405,6 +454,614 @@ def scope_document() -> HTMLResponse:
|
||||
return page("SCOPE.md", body)
|
||||
|
||||
|
||||
def render_self_scoping_index(
|
||||
*,
|
||||
error_message: str | None = None,
|
||||
status_code: int = 200,
|
||||
) -> HTMLResponse:
|
||||
golden_profiles = list_golden_profiles()
|
||||
assessments = list_assessment_artifacts()
|
||||
outcomes = list_outcome_records()
|
||||
error = (
|
||||
f"""
|
||||
<div class="notice error" role="alert">
|
||||
<strong>Self-scoping review failed.</strong>
|
||||
<p>{escape(error_message)}</p>
|
||||
</div>
|
||||
"""
|
||||
if error_message
|
||||
else ""
|
||||
)
|
||||
missing_inputs = ""
|
||||
if not golden_profiles or not assessments:
|
||||
missing_inputs = """
|
||||
<div class="notice warn">
|
||||
Add at least one golden profile and one assessment artifact under
|
||||
<span class="source">docs/self-scoping</span> before opening a comparison.
|
||||
</div>
|
||||
"""
|
||||
body = f"""
|
||||
<h1>Self-Scoping Review</h1>
|
||||
{error}
|
||||
{missing_inputs}
|
||||
<div class="grid">
|
||||
<section class="panel stack">
|
||||
<h2>Compare To Golden</h2>
|
||||
<form class="stack" method="get" action="/ui/self-scoping/review">
|
||||
<label>Golden profile
|
||||
<select name="golden" required>
|
||||
{_review_artifact_options(golden_profiles)}
|
||||
</select>
|
||||
</label>
|
||||
<label>Assessment artifact
|
||||
<select name="assessment" required>
|
||||
{_review_artifact_options(assessments)}
|
||||
</select>
|
||||
</label>
|
||||
<div class="actions">
|
||||
<button type="submit">Open comparison</button>
|
||||
</div>
|
||||
</form>
|
||||
<h2>Compare Two Runs</h2>
|
||||
<form class="stack" method="get" action="/ui/self-scoping/run-review">
|
||||
<label>Baseline assessment
|
||||
<select name="baseline" required>
|
||||
{_review_artifact_options(assessments)}
|
||||
</select>
|
||||
</label>
|
||||
<label>Challenger assessment
|
||||
<select name="challenger" required>
|
||||
{_review_artifact_options(assessments)}
|
||||
</select>
|
||||
</label>
|
||||
<div class="actions">
|
||||
<button type="submit">Open run comparison</button>
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
<section class="panel stack">
|
||||
<h2>Recorded Outcomes</h2>
|
||||
{_render_outcome_table(outcomes)}
|
||||
</section>
|
||||
</div>
|
||||
"""
|
||||
response = page("Self-Scoping Review", body)
|
||||
response.status_code = status_code
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/ui/self-scoping")
|
||||
def self_scoping_index() -> HTMLResponse:
|
||||
return render_self_scoping_index()
|
||||
|
||||
|
||||
@router.get("/ui/self-scoping/review")
|
||||
def self_scoping_review(
|
||||
golden: str = Query(...),
|
||||
assessment: str = Query(...),
|
||||
saved: str | None = Query(default=None),
|
||||
) -> HTMLResponse:
|
||||
try:
|
||||
golden_profile = load_json_artifact(golden)
|
||||
assessment_artifact = load_json_artifact(assessment)
|
||||
except (FileNotFoundError, ValueError, json.JSONDecodeError) as exc:
|
||||
return render_self_scoping_index(error_message=str(exc), status_code=400)
|
||||
|
||||
comparison = compare_assessment_to_golden(golden_profile, assessment_artifact)
|
||||
comparison_status = comparison["status"]
|
||||
comparison_summary = comparison["summary"]
|
||||
matched_count = len(comparison["matched_expected_capabilities"])
|
||||
missing_count = len(comparison["missing_expected_capabilities"])
|
||||
forbidden_count = len(comparison["forbidden_native_capabilities_present"])
|
||||
misplaced_count = len(comparison["misplaced_features"])
|
||||
saved_notice = (
|
||||
f"""
|
||||
<div class="notice success">
|
||||
Saved assessment outcome <span class="source">{escape(saved)}</span>.
|
||||
</div>
|
||||
"""
|
||||
if saved
|
||||
else ""
|
||||
)
|
||||
body = f"""
|
||||
<h1>Self-Scoping Comparison</h1>
|
||||
{saved_notice}
|
||||
<section class="panel stack">
|
||||
<div class="actions">
|
||||
<a class="button secondary" href="/ui/self-scoping">Back</a>
|
||||
</div>
|
||||
<div class="notice {_comparison_notice_class(comparison)}">
|
||||
<strong>{escape(comparison_status)}</strong>
|
||||
<p>{escape(comparison_summary)}</p>
|
||||
</div>
|
||||
<div class="review-meta">
|
||||
<span class="pill">Matched {matched_count}</span>
|
||||
<span class="pill">Missing {missing_count}</span>
|
||||
<span class="pill">Forbidden {forbidden_count}</span>
|
||||
<span class="pill">Misplaced {misplaced_count}</span>
|
||||
</div>
|
||||
</section>
|
||||
<div class="review-grid">
|
||||
<section class="panel">
|
||||
<h2>Golden Profile</h2>
|
||||
{_render_golden_tree(golden_profile, comparison)}
|
||||
</section>
|
||||
<section class="panel">
|
||||
<h2>Assessment Output</h2>
|
||||
{_render_assessment_tree(assessment_artifact, comparison)}
|
||||
</section>
|
||||
</div>
|
||||
<section class="panel stack">
|
||||
<h2>Record Review Outcome</h2>
|
||||
<form class="stack" method="post" action="/ui/self-scoping/review">
|
||||
<input type="hidden" name="golden_path" value="{escape(golden)}">
|
||||
<input type="hidden" name="assessment_path" value="{escape(assessment)}">
|
||||
<input type="hidden" name="comparison_status" value="{escape(comparison_status)}">
|
||||
<label>Decision
|
||||
<select name="outcome" required>
|
||||
{_review_outcome_options()}
|
||||
</select>
|
||||
</label>
|
||||
<label>Reviewer <input name="reviewer" value="codex"></label>
|
||||
<label>Notes <textarea name="notes" rows="4"></textarea></label>
|
||||
<div class="actions">
|
||||
<button type="submit">Save outcome</button>
|
||||
<span data-pending>Saving outcome...</span>
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
"""
|
||||
return page("Self-Scoping Comparison", body)
|
||||
|
||||
|
||||
@router.post("/ui/self-scoping/review")
|
||||
def save_self_scoping_review(
|
||||
golden_path: str = Form(...),
|
||||
assessment_path: str = Form(...),
|
||||
outcome: str = Form(...),
|
||||
reviewer: str = Form("codex"),
|
||||
notes: str = Form(""),
|
||||
comparison_status: str = Form(""),
|
||||
):
|
||||
try:
|
||||
record = record_assessment_outcome(
|
||||
golden_path=golden_path,
|
||||
assessment_path=assessment_path,
|
||||
outcome=outcome,
|
||||
reviewer=reviewer,
|
||||
notes=notes,
|
||||
comparison_status=comparison_status,
|
||||
)
|
||||
except (FileNotFoundError, ValueError, json.JSONDecodeError) as exc:
|
||||
return render_self_scoping_index(error_message=str(exc), status_code=400)
|
||||
return RedirectResponse(
|
||||
(
|
||||
"/ui/self-scoping/review"
|
||||
f"?golden={quote_plus(golden_path)}"
|
||||
f"&assessment={quote_plus(assessment_path)}"
|
||||
f"&saved={quote_plus(record['outcome_id'])}"
|
||||
),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/ui/self-scoping/run-review")
|
||||
def self_scoping_run_review(
|
||||
baseline: str = Query(...),
|
||||
challenger: str = Query(...),
|
||||
saved: str | None = Query(default=None),
|
||||
) -> HTMLResponse:
|
||||
try:
|
||||
baseline_artifact = load_json_artifact(baseline)
|
||||
challenger_artifact = load_json_artifact(challenger)
|
||||
except (FileNotFoundError, ValueError, json.JSONDecodeError) as exc:
|
||||
return render_self_scoping_index(error_message=str(exc), status_code=400)
|
||||
|
||||
comparison = _assessment_tree_diff(baseline_artifact, challenger_artifact)
|
||||
comparison_status = comparison["status"]
|
||||
comparison_summary = comparison["summary"]
|
||||
shared_count = len(comparison["shared_capabilities"])
|
||||
baseline_only_count = len(comparison["baseline_only_capabilities"])
|
||||
challenger_only_count = len(comparison["challenger_only_capabilities"])
|
||||
moved_feature_count = len(comparison["moved_feature_names"])
|
||||
saved_notice = (
|
||||
f"""
|
||||
<div class="notice success">
|
||||
Saved assessment outcome <span class="source">{escape(saved)}</span>.
|
||||
</div>
|
||||
"""
|
||||
if saved
|
||||
else ""
|
||||
)
|
||||
body = f"""
|
||||
<h1>Assessment Run Comparison</h1>
|
||||
{saved_notice}
|
||||
<section class="panel stack">
|
||||
<div class="actions">
|
||||
<a class="button secondary" href="/ui/self-scoping">Back</a>
|
||||
</div>
|
||||
<div class="notice {_comparison_notice_class(comparison)}">
|
||||
<strong>{escape(comparison_status)}</strong>
|
||||
<p>{escape(comparison_summary)}</p>
|
||||
</div>
|
||||
<div class="review-meta">
|
||||
<span class="pill">Shared {shared_count}</span>
|
||||
<span class="pill">Baseline only {baseline_only_count}</span>
|
||||
<span class="pill">Challenger only {challenger_only_count}</span>
|
||||
<span class="pill">Moved features {moved_feature_count}</span>
|
||||
</div>
|
||||
</section>
|
||||
<div class="review-grid">
|
||||
<section class="panel">
|
||||
<h2>Baseline Run</h2>
|
||||
{_render_assessment_tree_for_run_diff(baseline_artifact, comparison, role="baseline")}
|
||||
</section>
|
||||
<section class="panel">
|
||||
<h2>Challenger Run</h2>
|
||||
{_render_assessment_tree_for_run_diff(challenger_artifact, comparison, role="challenger")}
|
||||
</section>
|
||||
</div>
|
||||
<section class="panel stack">
|
||||
<h2>Record Review Outcome</h2>
|
||||
<form class="stack" method="post" action="/ui/self-scoping/run-review">
|
||||
<input type="hidden" name="baseline_path" value="{escape(baseline)}">
|
||||
<input type="hidden" name="challenger_path" value="{escape(challenger)}">
|
||||
<input type="hidden" name="comparison_status" value="{escape(comparison_status)}">
|
||||
<label>Decision
|
||||
<select name="outcome" required>
|
||||
{_pair_review_outcome_options()}
|
||||
</select>
|
||||
</label>
|
||||
<label>Reviewer <input name="reviewer" value="codex"></label>
|
||||
<label>Notes <textarea name="notes" rows="4"></textarea></label>
|
||||
<div class="actions">
|
||||
<button type="submit">Save outcome</button>
|
||||
<span data-pending>Saving outcome...</span>
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
"""
|
||||
return page("Assessment Run Comparison", body)
|
||||
|
||||
|
||||
@router.post("/ui/self-scoping/run-review")
|
||||
def save_self_scoping_run_review(
|
||||
baseline_path: str = Form(...),
|
||||
challenger_path: str = Form(...),
|
||||
outcome: str = Form(...),
|
||||
reviewer: str = Form("codex"),
|
||||
notes: str = Form(""),
|
||||
comparison_status: str = Form(""),
|
||||
):
|
||||
try:
|
||||
record = record_assessment_pair_outcome(
|
||||
baseline_path=baseline_path,
|
||||
challenger_path=challenger_path,
|
||||
outcome=outcome,
|
||||
reviewer=reviewer,
|
||||
notes=notes,
|
||||
comparison_status=comparison_status,
|
||||
)
|
||||
except (FileNotFoundError, ValueError, json.JSONDecodeError) as exc:
|
||||
return render_self_scoping_index(error_message=str(exc), status_code=400)
|
||||
return RedirectResponse(
|
||||
(
|
||||
"/ui/self-scoping/run-review"
|
||||
f"?baseline={quote_plus(baseline_path)}"
|
||||
f"&challenger={quote_plus(challenger_path)}"
|
||||
f"&saved={quote_plus(record['outcome_id'])}"
|
||||
),
|
||||
status_code=303,
|
||||
)
|
||||
|
||||
|
||||
def _assessment_tree_diff(baseline: dict, challenger: dict) -> dict:
|
||||
baseline_capabilities = set(_assessment_capability_names(baseline))
|
||||
challenger_capabilities = set(_assessment_capability_names(challenger))
|
||||
baseline_only = sorted(baseline_capabilities - challenger_capabilities)
|
||||
challenger_only = sorted(challenger_capabilities - baseline_capabilities)
|
||||
shared = sorted(baseline_capabilities & challenger_capabilities)
|
||||
|
||||
baseline_feature_index = _assessment_feature_index(baseline)
|
||||
challenger_feature_index = _assessment_feature_index(challenger)
|
||||
moved_feature_names = sorted(
|
||||
feature_name
|
||||
for feature_name in set(baseline_feature_index) & set(challenger_feature_index)
|
||||
if baseline_feature_index[feature_name] != challenger_feature_index[feature_name]
|
||||
)
|
||||
baseline_moved_pairs = {
|
||||
(capability_name, feature_name)
|
||||
for feature_name in moved_feature_names
|
||||
for capability_name in baseline_feature_index[feature_name]
|
||||
}
|
||||
challenger_moved_pairs = {
|
||||
(capability_name, feature_name)
|
||||
for feature_name in moved_feature_names
|
||||
for capability_name in challenger_feature_index[feature_name]
|
||||
}
|
||||
status = (
|
||||
"candidate_improvement"
|
||||
if not baseline_only and not challenger_only and not moved_feature_names
|
||||
else "needs_review"
|
||||
)
|
||||
return {
|
||||
"status": status,
|
||||
"summary": _assessment_tree_diff_summary(
|
||||
baseline_only,
|
||||
challenger_only,
|
||||
moved_feature_names,
|
||||
),
|
||||
"baseline_only_capabilities": baseline_only,
|
||||
"challenger_only_capabilities": challenger_only,
|
||||
"shared_capabilities": shared,
|
||||
"moved_feature_names": moved_feature_names,
|
||||
"baseline_moved_feature_pairs": baseline_moved_pairs,
|
||||
"challenger_moved_feature_pairs": challenger_moved_pairs,
|
||||
}
|
||||
|
||||
|
||||
def _assessment_tree_diff_summary(
|
||||
baseline_only: list[str],
|
||||
challenger_only: list[str],
|
||||
moved_feature_names: list[str],
|
||||
) -> str:
|
||||
if not baseline_only and not challenger_only and not moved_feature_names:
|
||||
return "Assessment hierarchy names match between baseline and challenger."
|
||||
return (
|
||||
"Assessment runs differ: "
|
||||
f"{len(baseline_only)} baseline-only capability(s), "
|
||||
f"{len(challenger_only)} challenger-only capability(s), and "
|
||||
f"{len(moved_feature_names)} moved feature name(s)."
|
||||
)
|
||||
|
||||
|
||||
def _assessment_capability_names(assessment: dict) -> list[str]:
|
||||
names: list[str] = []
|
||||
for ability in assessment.get("generated_tree", {}).get("abilities", []):
|
||||
for capability in ability.get("capabilities", []):
|
||||
name = capability.get("name")
|
||||
if name:
|
||||
names.append(name)
|
||||
return names
|
||||
|
||||
|
||||
def _assessment_feature_index(assessment: dict) -> dict[str, set[str]]:
|
||||
index: dict[str, set[str]] = {}
|
||||
for ability in assessment.get("generated_tree", {}).get("abilities", []):
|
||||
for capability in ability.get("capabilities", []):
|
||||
capability_name = capability.get("name", "")
|
||||
for feature in capability.get("features", []):
|
||||
feature_name = feature.get("name")
|
||||
if feature_name:
|
||||
index.setdefault(feature_name, set()).add(capability_name)
|
||||
return index
|
||||
|
||||
|
||||
def _render_assessment_tree_for_run_diff(
|
||||
assessment: dict,
|
||||
comparison: dict,
|
||||
*,
|
||||
role: str,
|
||||
) -> str:
|
||||
if role == "baseline":
|
||||
changed_capabilities = set(comparison["baseline_only_capabilities"])
|
||||
moved_pairs = comparison["baseline_moved_feature_pairs"]
|
||||
changed_reason = "Baseline only"
|
||||
else:
|
||||
changed_capabilities = set(comparison["challenger_only_capabilities"])
|
||||
moved_pairs = comparison["challenger_moved_feature_pairs"]
|
||||
changed_reason = "Challenger only"
|
||||
shared = set(comparison["shared_capabilities"])
|
||||
ability_blocks = []
|
||||
for ability in assessment.get("generated_tree", {}).get("abilities", []):
|
||||
capability_blocks = []
|
||||
for capability in ability.get("capabilities", []):
|
||||
name = capability.get("name", "")
|
||||
item_class = "warn" if name in changed_capabilities else "match" if name in shared else ""
|
||||
reason = changed_reason if name in changed_capabilities else "Shared capability"
|
||||
capability_blocks.append(
|
||||
f"""
|
||||
<article class="review-item {item_class}">
|
||||
<h3>{escape(name)}</h3>
|
||||
<div class="review-meta">
|
||||
<span class="pill">{escape(reason)}</span>
|
||||
<span class="pill">{escape(capability.get("primary_class", ""))}</span>
|
||||
</div>
|
||||
{_render_generated_features(name, capability.get("features", []), moved_pairs)}
|
||||
</article>
|
||||
"""
|
||||
)
|
||||
ability_blocks.append(
|
||||
f"""
|
||||
<section class="stack">
|
||||
<h3>{escape(ability.get("name", ""))}</h3>
|
||||
{"".join(capability_blocks) or '<p class="muted">No capabilities generated.</p>'}
|
||||
</section>
|
||||
"""
|
||||
)
|
||||
return "\n".join(ability_blocks) or '<p class="muted">No generated abilities found.</p>'
|
||||
|
||||
|
||||
def _review_artifact_options(artifacts) -> str:
|
||||
if not artifacts:
|
||||
return '<option value="">No artifacts found</option>'
|
||||
return "\n".join(
|
||||
f"""
|
||||
<option value="{escape(artifact.path)}">
|
||||
{escape(artifact.artifact_id)} · {escape(artifact.updated_at or artifact.title)}
|
||||
</option>
|
||||
"""
|
||||
for artifact in artifacts
|
||||
)
|
||||
|
||||
|
||||
def _render_outcome_table(outcomes: list[dict]) -> str:
|
||||
if not outcomes:
|
||||
return '<p class="muted">No review outcomes have been recorded yet.</p>'
|
||||
rows = "\n".join(
|
||||
f"""
|
||||
<tr>
|
||||
<td>{escape(record.get("created_at", ""))}</td>
|
||||
<td><span class="pill">{escape(record.get("outcome", ""))}</span></td>
|
||||
<td>{escape(record.get("comparison_status", ""))}</td>
|
||||
<td class="source">{escape(_outcome_record_subject(record))}</td>
|
||||
</tr>
|
||||
"""
|
||||
for record in outcomes[:8]
|
||||
)
|
||||
return f"""
|
||||
<table>
|
||||
<thead><tr><th>Created</th><th>Outcome</th><th>Status</th><th>Assessment</th></tr></thead>
|
||||
<tbody>{rows}</tbody>
|
||||
</table>
|
||||
"""
|
||||
|
||||
|
||||
def _outcome_record_subject(record: dict) -> str:
|
||||
if record.get("assessment_artifact_id"):
|
||||
return str(record["assessment_artifact_id"])
|
||||
if record.get("challenger_assessment_artifact_id"):
|
||||
return str(record["challenger_assessment_artifact_id"])
|
||||
if record.get("outcome_id"):
|
||||
return str(record["outcome_id"])
|
||||
return ""
|
||||
|
||||
|
||||
def _comparison_notice_class(comparison: dict) -> str:
|
||||
if comparison["status"] == "regression":
|
||||
return "error"
|
||||
if comparison["status"] == "needs_review":
|
||||
return "warn"
|
||||
return "success"
|
||||
|
||||
|
||||
def _render_golden_tree(golden_profile: dict, comparison: dict) -> str:
|
||||
missing = set(comparison["missing_expected_capabilities"])
|
||||
matched = set(comparison["matched_expected_capabilities"])
|
||||
capabilities = golden_profile.get("ability", {}).get("expected_capabilities", [])
|
||||
items = []
|
||||
for capability in capabilities:
|
||||
name = capability.get("name", "")
|
||||
item_class = "problem" if name in missing else "match" if name in matched else ""
|
||||
features = _render_expected_features(capability.get("expected_features", []))
|
||||
state = "Missing" if name in missing else "Matched" if name in matched else "Expected"
|
||||
items.append(
|
||||
f"""
|
||||
<article class="review-item {item_class}">
|
||||
<h3>{escape(name)}</h3>
|
||||
<div class="review-meta">
|
||||
<span class="pill">{escape(state)}</span>
|
||||
<span class="pill">{escape(capability.get("primary_class", ""))}</span>
|
||||
</div>
|
||||
{features}
|
||||
</article>
|
||||
"""
|
||||
)
|
||||
return "\n".join(items) or '<p class="muted">No expected capabilities found.</p>'
|
||||
|
||||
|
||||
def _render_expected_features(features: list[dict]) -> str:
|
||||
if not features:
|
||||
return ""
|
||||
rows = []
|
||||
for feature in features:
|
||||
sources = ", ".join(feature.get("source_paths", [])[:3])
|
||||
rows.append(
|
||||
f"""
|
||||
<li>
|
||||
{escape(feature.get("name", ""))}
|
||||
<span class="pill">{escape(feature.get("primary_class", ""))}</span>
|
||||
<div class="source">{escape(sources)}</div>
|
||||
</li>
|
||||
"""
|
||||
)
|
||||
return f'<ul class="review-list">{"".join(rows)}</ul>'
|
||||
|
||||
|
||||
def _render_assessment_tree(assessment: dict, comparison: dict) -> str:
|
||||
forbidden = set(comparison["forbidden_native_capabilities_present"])
|
||||
unexpected = set(comparison["unexpected_native_capabilities"])
|
||||
misplaced = {
|
||||
(item.get("capability", ""), item.get("feature", ""))
|
||||
for item in comparison["misplaced_features"]
|
||||
}
|
||||
abilities = assessment.get("generated_tree", {}).get("abilities", [])
|
||||
ability_blocks = []
|
||||
for ability in abilities:
|
||||
capabilities = ability.get("capabilities", [])
|
||||
capability_blocks = []
|
||||
for capability in capabilities:
|
||||
name = capability.get("name", "")
|
||||
item_class = "problem" if name in forbidden else "warn" if name in unexpected else ""
|
||||
reason = (
|
||||
"Forbidden native capability"
|
||||
if name in forbidden
|
||||
else "Unexpected native capability"
|
||||
if name in unexpected
|
||||
else "Generated capability"
|
||||
)
|
||||
capability_blocks.append(
|
||||
f"""
|
||||
<article class="review-item {item_class}">
|
||||
<h3>{escape(name)}</h3>
|
||||
<div class="review-meta">
|
||||
<span class="pill">{escape(reason)}</span>
|
||||
<span class="pill">{escape(capability.get("primary_class", ""))}</span>
|
||||
</div>
|
||||
{_render_generated_features(name, capability.get("features", []), misplaced)}
|
||||
</article>
|
||||
"""
|
||||
)
|
||||
ability_blocks.append(
|
||||
f"""
|
||||
<section class="stack">
|
||||
<h3>{escape(ability.get("name", ""))}</h3>
|
||||
{"".join(capability_blocks) or '<p class="muted">No capabilities generated.</p>'}
|
||||
</section>
|
||||
"""
|
||||
)
|
||||
return "\n".join(ability_blocks) or '<p class="muted">No generated abilities found.</p>'
|
||||
|
||||
|
||||
def _render_generated_features(
|
||||
capability_name: str,
|
||||
features: list[dict],
|
||||
misplaced: set[tuple[str, str]],
|
||||
) -> str:
|
||||
if not features:
|
||||
return ""
|
||||
rows = []
|
||||
for feature in features:
|
||||
feature_name = feature.get("name", "")
|
||||
feature_class = "warn" if (capability_name, feature_name) in misplaced else ""
|
||||
rows.append(
|
||||
f"""
|
||||
<li class="{feature_class}">
|
||||
{escape(feature_name)}
|
||||
<span class="pill">{escape(feature.get("type") or feature.get("primary_class") or "")}</span>
|
||||
<div class="source">{escape(feature.get("location", ""))}</div>
|
||||
</li>
|
||||
"""
|
||||
)
|
||||
return f'<ul class="review-list">{"".join(rows)}</ul>'
|
||||
|
||||
|
||||
def _review_outcome_options() -> str:
|
||||
return "\n".join(
|
||||
f'<option value="{escape(value)}">{escape(REVIEW_OUTCOME_LABELS[value])}</option>'
|
||||
for value in REVIEW_OUTCOME_LABELS
|
||||
if value in ALLOWED_OUTCOMES
|
||||
)
|
||||
|
||||
|
||||
def _pair_review_outcome_options() -> str:
|
||||
return "\n".join(
|
||||
f'<option value="{escape(value)}">{escape(PAIR_REVIEW_OUTCOME_LABELS[value])}</option>'
|
||||
for value in PAIR_REVIEW_OUTCOME_LABELS
|
||||
if value in ALLOWED_OUTCOMES
|
||||
)
|
||||
|
||||
|
||||
@router.get("/ui/repos/{repository_id}/scope")
|
||||
def repository_scope_document(
|
||||
repository_id: int,
|
||||
|
||||
105
tests/test_self_scoping_review_store.py
Normal file
105
tests/test_self_scoping_review_store.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from repo_registry.self_scoping.review_store import (
|
||||
list_assessment_artifacts,
|
||||
list_golden_profiles,
|
||||
load_json_artifact,
|
||||
record_assessment_outcome,
|
||||
record_assessment_pair_outcome,
|
||||
)
|
||||
|
||||
|
||||
def write_json(path, payload):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
|
||||
def test_review_store_lists_and_loads_artifacts(tmp_path):
|
||||
root = tmp_path / "self-scoping"
|
||||
write_json(
|
||||
root / "golden" / "profile.json",
|
||||
{"profile_id": "repo-scoping-golden-v1", "title": "Golden", "updated_at": "2026-05-15"},
|
||||
)
|
||||
write_json(
|
||||
root / "assessments" / "run.json",
|
||||
{"artifact_id": "known-bad-run", "created_at": "2026-05-15T10:00:00Z"},
|
||||
)
|
||||
|
||||
assert list_golden_profiles(root)[0].path == "golden/profile.json"
|
||||
assert list_assessment_artifacts(root)[0].artifact_id == "known-bad-run"
|
||||
assert load_json_artifact("assessments/run.json", root)["artifact_id"] == "known-bad-run"
|
||||
|
||||
|
||||
def test_review_store_rejects_paths_outside_root(tmp_path):
|
||||
root = tmp_path / "self-scoping"
|
||||
write_json(root / "golden" / "profile.json", {"profile_id": "profile"})
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
load_json_artifact("../outside.json", root)
|
||||
|
||||
|
||||
def test_record_assessment_outcome_is_append_only_json(tmp_path):
|
||||
root = tmp_path / "self-scoping"
|
||||
write_json(root / "golden" / "profile.json", {"profile_id": "profile-v1"})
|
||||
write_json(
|
||||
root / "assessments" / "run.json",
|
||||
{
|
||||
"artifact_id": "run-1",
|
||||
"engine_identity": {"engine_commit": "abc123", "engine_release": "v1"},
|
||||
},
|
||||
)
|
||||
|
||||
record = record_assessment_outcome(
|
||||
golden_path="golden/profile.json",
|
||||
assessment_path="assessments/run.json",
|
||||
outcome="prefer_golden",
|
||||
reviewer="codex",
|
||||
notes="Known provider-routing regression remains present.",
|
||||
comparison_status="regression",
|
||||
root=root,
|
||||
)
|
||||
|
||||
outcome_files = list((root / "outcomes").glob("*.json"))
|
||||
assert len(outcome_files) == 1
|
||||
persisted = json.loads(outcome_files[0].read_text(encoding="utf-8"))
|
||||
assert persisted == record
|
||||
assert persisted["schema_version"] == "self-scoping-review-outcome/v1"
|
||||
assert persisted["golden_profile_id"] == "profile-v1"
|
||||
assert persisted["assessment_artifact_id"] == "run-1"
|
||||
assert persisted["engine_identity"]["engine_commit"] == "abc123"
|
||||
|
||||
|
||||
def test_record_assessment_pair_outcome_keeps_both_release_bindings(tmp_path):
|
||||
root = tmp_path / "self-scoping"
|
||||
write_json(
|
||||
root / "assessments" / "baseline.json",
|
||||
{
|
||||
"artifact_id": "baseline-run",
|
||||
"engine_identity": {"engine_commit": "old"},
|
||||
},
|
||||
)
|
||||
write_json(
|
||||
root / "assessments" / "challenger.json",
|
||||
{
|
||||
"artifact_id": "challenger-run",
|
||||
"engine_identity": {"engine_commit": "new"},
|
||||
},
|
||||
)
|
||||
|
||||
record = record_assessment_pair_outcome(
|
||||
baseline_path="assessments/baseline.json",
|
||||
challenger_path="assessments/challenger.json",
|
||||
outcome="prefer_challenger",
|
||||
reviewer="codex",
|
||||
notes="Hierarchy is closer to native repo-scoping utility.",
|
||||
comparison_status="needs_review",
|
||||
root=root,
|
||||
)
|
||||
|
||||
assert record["decision_scope"] == "assessment-pair-comparison"
|
||||
assert record["baseline_assessment_artifact_id"] == "baseline-run"
|
||||
assert record["challenger_assessment_artifact_id"] == "challenger-run"
|
||||
assert record["baseline_engine_identity"]["engine_commit"] == "old"
|
||||
assert record["challenger_engine_identity"]["engine_commit"] == "new"
|
||||
158
tests/test_self_scoping_web_ui.py
Normal file
158
tests/test_self_scoping_web_ui.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import json
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from repo_registry.web_api.app import app
|
||||
|
||||
|
||||
def write_json(path, payload):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
|
||||
def seed_review_artifacts(root):
|
||||
write_json(
|
||||
root / "golden" / "profile.json",
|
||||
{
|
||||
"profile_id": "profile-v1",
|
||||
"ability": {
|
||||
"expected_capabilities": [
|
||||
{
|
||||
"name": "Scan Repositories Into Observed Facts",
|
||||
"primary_class": "analysis",
|
||||
"expected_features": [],
|
||||
}
|
||||
]
|
||||
},
|
||||
"forbidden_native_capabilities": [
|
||||
{"name": "Route LLM Requests Across Providers"}
|
||||
],
|
||||
},
|
||||
)
|
||||
write_json(
|
||||
root / "assessments" / "baseline.json",
|
||||
{
|
||||
"artifact_id": "baseline-run",
|
||||
"target_repository": {"repo_slug": "repo-scoping"},
|
||||
"engine_identity": {"engine_commit": "old"},
|
||||
"generated_tree": {
|
||||
"abilities": [
|
||||
{
|
||||
"name": "Map Repositories Into Reviewable Scope Profiles",
|
||||
"capabilities": [
|
||||
{
|
||||
"name": "Scan Repositories Into Observed Facts",
|
||||
"primary_class": "analysis",
|
||||
"features": [
|
||||
{
|
||||
"name": "HTTP API surface",
|
||||
"type": "API",
|
||||
"location": "src/repo_registry/web_api/app.py",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
"known_regression_patterns": [],
|
||||
},
|
||||
)
|
||||
write_json(
|
||||
root / "assessments" / "run.json",
|
||||
{
|
||||
"artifact_id": "run-1",
|
||||
"target_repository": {"repo_slug": "repo-scoping"},
|
||||
"engine_identity": {"engine_commit": "abc123"},
|
||||
"generated_tree": {
|
||||
"abilities": [
|
||||
{
|
||||
"name": "Support Repo Registry",
|
||||
"capabilities": [
|
||||
{
|
||||
"name": "Route LLM Requests Across Providers",
|
||||
"primary_class": "llm-integration",
|
||||
"features": [
|
||||
{
|
||||
"name": "HTTP API surface",
|
||||
"type": "API",
|
||||
"location": "src/repo_registry/web_api/app.py",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
"known_regression_patterns": [],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_self_scoping_ui_compares_and_records_outcome(tmp_path, monkeypatch):
|
||||
root = tmp_path / "self-scoping"
|
||||
seed_review_artifacts(root)
|
||||
monkeypatch.setenv("REPO_REGISTRY_SELF_SCOPING_ROOT", str(root))
|
||||
client = TestClient(app)
|
||||
|
||||
index = client.get("/ui/self-scoping")
|
||||
assert index.status_code == 200
|
||||
assert "Self-Scoping Review" in index.text
|
||||
|
||||
review = client.get(
|
||||
"/ui/self-scoping/review",
|
||||
params={"golden": "golden/profile.json", "assessment": "assessments/run.json"},
|
||||
)
|
||||
assert review.status_code == 200
|
||||
assert "Route LLM Requests Across Providers" in review.text
|
||||
assert "regression" in review.text
|
||||
|
||||
saved = client.post(
|
||||
"/ui/self-scoping/review",
|
||||
data={
|
||||
"golden_path": "golden/profile.json",
|
||||
"assessment_path": "assessments/run.json",
|
||||
"outcome": "prefer_golden",
|
||||
"reviewer": "codex",
|
||||
"notes": "Provider routing is not native scope.",
|
||||
"comparison_status": "regression",
|
||||
},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert saved.status_code == 303
|
||||
assert list((root / "outcomes").glob("*.json"))
|
||||
|
||||
|
||||
def test_self_scoping_ui_compares_two_assessment_runs(tmp_path, monkeypatch):
|
||||
root = tmp_path / "self-scoping"
|
||||
seed_review_artifacts(root)
|
||||
monkeypatch.setenv("REPO_REGISTRY_SELF_SCOPING_ROOT", str(root))
|
||||
client = TestClient(app)
|
||||
|
||||
review = client.get(
|
||||
"/ui/self-scoping/run-review",
|
||||
params={
|
||||
"baseline": "assessments/baseline.json",
|
||||
"challenger": "assessments/run.json",
|
||||
},
|
||||
)
|
||||
assert review.status_code == 200
|
||||
assert "Assessment Run Comparison" in review.text
|
||||
assert "Baseline only" in review.text
|
||||
assert "Challenger only" in review.text
|
||||
|
||||
saved = client.post(
|
||||
"/ui/self-scoping/run-review",
|
||||
data={
|
||||
"baseline_path": "assessments/baseline.json",
|
||||
"challenger_path": "assessments/run.json",
|
||||
"outcome": "prefer_baseline",
|
||||
"reviewer": "codex",
|
||||
"notes": "Baseline preserves native scanning capability.",
|
||||
"comparison_status": "needs_review",
|
||||
},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert saved.status_code == 303
|
||||
records = [json.loads(path.read_text()) for path in (root / "outcomes").glob("*.json")]
|
||||
assert any(record["decision_scope"] == "assessment-pair-comparison" for record in records)
|
||||
@@ -4,7 +4,7 @@ type: workplan
|
||||
title: "Self-Scoping Baseline Evaluation"
|
||||
domain: capabilities
|
||||
repo: repo-scoping
|
||||
status: active
|
||||
status: done
|
||||
owner: codex
|
||||
topic_slug: foerster-capabilities
|
||||
created: "2026-05-15"
|
||||
@@ -208,7 +208,7 @@ capabilities. Reports can be emitted as JSON or Markdown.
|
||||
|
||||
```task
|
||||
id: RREG-WP-0013-T06
|
||||
status: todo
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "16a60b7c-7e2c-4bb0-b4ab-2381289dba0b"
|
||||
```
|
||||
@@ -223,6 +223,12 @@ Acceptance criteria:
|
||||
- Review decisions are persisted as assessment outcomes, not as changes to the
|
||||
underlying historical artifacts.
|
||||
|
||||
Implementation note 2026-05-15: added a file-backed `/ui/self-scoping` curator
|
||||
surface that reads golden profiles and assessment artifacts from
|
||||
`docs/self-scoping`, renders side-by-side hierarchy comparisons with regression
|
||||
highlights, compares two assessment runs directly for old-vs-new judgement, and
|
||||
records append-only review outcome JSON under `docs/self-scoping/outcomes/`.
|
||||
|
||||
## T07: Add Self-Scoping Regression Command
|
||||
|
||||
```task
|
||||
|
||||
Reference in New Issue
Block a user