Add self-scoping assessment export command

This commit is contained in:
2026-05-15 12:39:51 +02:00
parent bc08977f85
commit 2796fc5816
10 changed files with 934 additions and 19 deletions

View File

@@ -15,7 +15,11 @@
"execution",
"assessment",
"fact_summary",
"content_chunk_summary",
"generated_tree",
"approved_map",
"review_decisions",
"quality_gate_outcomes",
"known_regression_patterns"
],
"properties": {
@@ -48,9 +52,27 @@
"fact_summary": {
"$ref": "#/$defs/factSummary"
},
"content_chunk_summary": {
"$ref": "#/$defs/contentChunkSummary"
},
"generated_tree": {
"$ref": "#/$defs/generatedTree"
},
"approved_map": {
"$ref": "#/$defs/approvedMap"
},
"review_decisions": {
"type": "array",
"items": {
"$ref": "#/$defs/reviewDecision"
}
},
"quality_gate_outcomes": {
"type": "array",
"items": {
"$ref": "#/$defs/qualityGateOutcome"
}
},
"known_regression_patterns": {
"type": "array",
"items": {
@@ -255,6 +277,37 @@
}
}
},
"contentChunkSummary": {
"type": "object",
"additionalProperties": false,
"required": ["total", "counts_by_kind", "counts_by_source_role", "paths"],
"properties": {
"total": {
"type": "integer",
"minimum": 0
},
"counts_by_kind": {
"type": "object",
"additionalProperties": {
"type": "integer",
"minimum": 0
}
},
"counts_by_source_role": {
"type": "object",
"additionalProperties": {
"type": "integer",
"minimum": 0
}
},
"paths": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"generatedTree": {
"type": "object",
"additionalProperties": false,
@@ -271,7 +324,7 @@
"ability": {
"type": "object",
"additionalProperties": false,
"required": ["name", "status", "primary_class", "capabilities"],
"required": ["name", "status", "primary_class", "source_refs", "capabilities"],
"properties": {
"name": {
"type": "string"
@@ -282,6 +335,12 @@
"primary_class": {
"type": "string"
},
"source_refs": {
"type": "array",
"items": {
"$ref": "#/$defs/sourceRef"
}
},
"capabilities": {
"type": "array",
"items": {
@@ -293,7 +352,14 @@
"capability": {
"type": "object",
"additionalProperties": false,
"required": ["name", "status", "primary_class", "features"],
"required": [
"name",
"status",
"primary_class",
"source_refs",
"features",
"evidence"
],
"properties": {
"name": {
"type": "string"
@@ -304,18 +370,37 @@
"primary_class": {
"type": "string"
},
"source_refs": {
"type": "array",
"items": {
"$ref": "#/$defs/sourceRef"
}
},
"features": {
"type": "array",
"items": {
"$ref": "#/$defs/feature"
}
},
"evidence": {
"type": "array",
"items": {
"$ref": "#/$defs/candidateEvidence"
}
}
}
},
"feature": {
"type": "object",
"additionalProperties": false,
"required": ["name", "type", "status", "primary_class", "location"],
"required": [
"name",
"type",
"status",
"primary_class",
"location",
"source_refs"
],
"properties": {
"name": {
"type": "string"
@@ -331,9 +416,104 @@
},
"location": {
"type": "string"
},
"source_refs": {
"type": "array",
"items": {
"$ref": "#/$defs/sourceRef"
}
}
}
},
"candidateEvidence": {
"type": "object",
"additionalProperties": false,
"required": ["type", "reference", "strength", "status", "source_refs"],
"properties": {
"type": {
"type": "string"
},
"reference": {
"type": "string"
},
"strength": {
"type": "string"
},
"status": {
"type": "string"
},
"source_refs": {
"type": "array",
"items": {
"$ref": "#/$defs/sourceRef"
}
}
}
},
"sourceRef": {
"type": "object",
"additionalProperties": false,
"required": ["fact_id", "path", "kind", "name", "line"],
"properties": {
"fact_id": {
"type": ["integer", "null"]
},
"path": {
"type": "string"
},
"kind": {
"type": "string"
},
"name": {
"type": "string"
},
"line": {
"type": ["integer", "null"]
}
}
},
"approvedMap": {
"type": "object",
"description": "Current approved ability map at export time.",
"additionalProperties": true
},
"reviewDecision": {
"type": "object",
"additionalProperties": false,
"required": [
"id",
"repository_id",
"analysis_run_id",
"action",
"notes",
"created_at"
],
"properties": {
"id": {
"type": "integer"
},
"repository_id": {
"type": "integer"
},
"analysis_run_id": {
"type": ["integer", "null"]
},
"action": {
"type": "string"
},
"notes": {
"type": "string"
},
"created_at": {
"type": "string"
}
}
},
"qualityGateOutcome": {
"type": "object",
"description": "Versioned deterministic quality-gate outcome. Empty until RREG-WP-0014 introduces gates.",
"additionalProperties": true
},
"regressionPattern": {
"type": "object",
"additionalProperties": false,
@@ -411,23 +591,35 @@
}
]
},
"content_chunk_summary": {
"total": 0,
"counts_by_kind": {},
"counts_by_source_role": {},
"paths": []
},
"generated_tree": {
"abilities": [
{
"name": "Support Repo Registry",
"status": "approved",
"primary_class": "repository-intelligence",
"source_refs": [],
"capabilities": [
{
"name": "Route LLM Requests Across Providers",
"status": "approved",
"primary_class": "llm-integration",
"features": []
"source_refs": [],
"features": [],
"evidence": []
}
]
}
]
},
"approved_map": {},
"review_decisions": [],
"quality_gate_outcomes": [],
"known_regression_patterns": [
{
"id": "RREG-SELF-REG-001",

View File

@@ -33,3 +33,18 @@ assessment. Compare the challenger to the golden profile and to the negative
seed. Reviewers should be able to choose whether the old result, new result, or
neither is better, then store that judgement as a new assessment outcome.
## Export Command
Export a completed analysis run as a challenger artifact:
```bash
repo-scoping export-assessment \
--repo repo-scoping \
--analysis-run 39 \
--output docs/self-scoping/assessments/repo-scoping-challenger-run-39.json
```
The command reads an existing registry database and does not clone or scan the
target repository. It records the target analysis metadata, candidate graph,
approved map at export time, review decisions, fact and content summaries, known
regression patterns, and current repo-scoping engine identity.

View File

@@ -91,94 +91,125 @@
}
]
},
"content_chunk_summary": {
"total": 0,
"counts_by_kind": {},
"counts_by_source_role": {},
"paths": []
},
"generated_tree": {
"abilities": [
{
"name": "Support Repo Registry",
"status": "approved",
"primary_class": "repository-intelligence",
"source_refs": [],
"capabilities": [
{
"name": "Route LLM Requests Across Providers",
"status": "approved",
"primary_class": "llm-integration",
"source_refs": [],
"features": [
{
"name": "Use Anthropic Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
},
{
"name": "Use Claude Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
},
{
"name": "Use Gemini Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
},
{
"name": "Use OpenAI Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
},
{
"name": "Use OpenRouter Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
},
{
"name": "Configure LLM Provider Credentials",
"type": "configuration",
"status": "approved",
"primary_class": "configuration",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
},
{
"name": "Maintain LLM Provider Registry",
"type": "backend",
"status": "approved",
"primary_class": "backend",
"location": "src/repo_registry/repo_scanning/scanner.py"
"location": "src/repo_registry/repo_scanning/scanner.py",
"source_refs": []
},
{
"name": "Apply LLM Provider Fallback Policy",
"type": "backend",
"status": "approved",
"primary_class": "backend",
"location": "src/repo_registry/repo_scanning/scanner.py"
"location": "src/repo_registry/repo_scanning/scanner.py",
"source_refs": []
},
{
"name": "HTTP API surface: possible API surface, GET /health, @app.post(, +43 more",
"type": "API",
"status": "approved",
"primary_class": "API",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
},
{
"name": "CLI command surface: CLI command build_parser, CLI command make_service",
"type": "CLI",
"status": "approved",
"primary_class": "CLI",
"location": "multiple files"
"location": "multiple files",
"source_refs": []
}
]
],
"evidence": []
}
]
}
]
},
"approved_map": {},
"review_decisions": [
{
"id": 21,
"repository_id": 16,
"analysis_run_id": 39,
"action": "trusted_auto_approve_candidate_graph",
"notes": "Trusted auto-populate mode reviewed candidate graph after deterministic candidate generation. Auto-approved 1 safe candidate capability(s); left 0 for review. Approved: Route LLM Requests Across Providers: eligible LLM utility relationship with source support.",
"created_at": "2026-05-15 09:28:49"
}
],
"quality_gate_outcomes": [],
"known_regression_patterns": [
{
"id": "RREG-SELF-REG-001",

View File

@@ -8,6 +8,7 @@ from repo_registry.core.models import CharacteristicRebuildResult, Repository
from repo_registry.core.service import RegistryService
from repo_registry.llm_extraction import LLMCandidateExtractor, create_llm_connect_adapter
from repo_registry.repo_ingestion.git import GitIngestionService
from repo_registry.self_scoping.assessment import artifact_json, export_assessment_artifact
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
from repo_registry.web_api.app import Settings
@@ -44,6 +45,37 @@ def build_parser() -> argparse.ArgumentParser:
)
rebuild.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
rebuild.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
export = subparsers.add_parser(
"export-assessment",
help="Export a completed analysis run as a self-scoping assessment artifact.",
)
export.add_argument("--repo", required=True, help="Repository id or exact repository name.")
export.add_argument("--analysis-run", type=int, required=True, help="Completed analysis run id.")
export.add_argument("--output", help="Write artifact JSON to this path instead of stdout.")
export.add_argument(
"--role",
choices=["baseline", "challenger", "negative_regression_seed"],
default="challenger",
help="Assessment artifact role.",
)
export.add_argument(
"--outcome",
choices=[
"baseline",
"challenger",
"preferred",
"tied",
"rejected",
"superseded",
"needs-human",
],
default="challenger",
help="Initial assessment outcome.",
)
export.add_argument("--reviewer", default="codex", help="Reviewer name recorded in the artifact.")
export.add_argument("--summary", help="Assessment summary override.")
export.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
export.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
return parser
@@ -52,6 +84,8 @@ def main(argv: Sequence[str] | None = None) -> int:
args = parser.parse_args(argv)
if args.command == "rebuild-characteristics":
return rebuild_characteristics_command(args, parser)
if args.command == "export-assessment":
return export_assessment_command(args, parser)
parser.error(f"unknown command: {args.command}")
return 2
@@ -88,6 +122,38 @@ def rebuild_characteristics_command(
return 0
def export_assessment_command(
args: argparse.Namespace,
parser: argparse.ArgumentParser,
) -> int:
service = service_from_args(args)
repositories = selected_repositories(service, args)
if not repositories:
parser.error("no repositories matched the requested target")
if len(repositories) > 1:
parser.error("assessment export requires exactly one repository")
repository = repositories[0]
try:
artifact = export_assessment_artifact(
service,
repository.id,
args.analysis_run,
role=args.role,
outcome=args.outcome,
reviewer=args.reviewer,
summary=args.summary,
)
except (NotFoundError, ValueError) as exc:
parser.error(str(exc))
content = artifact_json(artifact)
if args.output:
Path(args.output).write_text(content, encoding="utf-8")
else:
print(content, end="")
return 0
def service_from_args(args: argparse.Namespace) -> RegistryService:
settings = Settings()
database_path = Path(args.database_path or settings.database_path)
@@ -96,7 +162,8 @@ def service_from_args(args: argparse.Namespace) -> RegistryService:
store = RegistryStore(database_path)
store.initialize()
llm_extractor = None
if not args.no_llm and settings.llm_enabled and settings.llm_provider:
no_llm = getattr(args, "no_llm", True)
if not no_llm and settings.llm_enabled and settings.llm_provider:
adapter = create_llm_connect_adapter(settings.llm_provider, model=settings.llm_model)
llm_extractor = LLMCandidateExtractor(adapter)
return RegistryService(
@@ -111,7 +178,7 @@ def selected_repositories(
args: argparse.Namespace,
) -> list[Repository]:
repositories = service.list_repositories()
if args.all:
if getattr(args, "all", False):
return repositories
repo = str(args.repo)
if repo.isdigit():

View File

@@ -0,0 +1,3 @@
from repo_registry.self_scoping.assessment import export_assessment_artifact
__all__ = ["export_assessment_artifact"]

View File

@@ -0,0 +1,462 @@
from __future__ import annotations
import json
import subprocess
from collections import Counter
from dataclasses import asdict
from datetime import UTC, datetime
from importlib import metadata
from pathlib import Path
from typing import Any
from repo_registry.core.models import (
Ability,
CandidateAbility,
CandidateCapability,
CandidateEvidence,
CandidateFeature,
ContentChunk,
ObservedFact,
RepositoryAbilityMap,
ReviewDecision,
SourceReference,
)
from repo_registry.core.service import RegistryService
SCHEMA_VERSION = "self-scoping-assessment/v1"
KNOWN_PROVIDER_ROUTING_CAPABILITY = "Route LLM Requests Across Providers"
def export_assessment_artifact(
service: RegistryService,
repository_id: int,
analysis_run_id: int,
*,
role: str = "challenger",
outcome: str = "challenger",
reviewer: str = "codex",
summary: str | None = None,
engine_root: str | Path | None = None,
) -> dict[str, Any]:
"""Export a completed analysis run as a self-scoping assessment artifact."""
repository = service.get_repository(repository_id)
analysis_run = service.get_analysis_run(repository_id, analysis_run_id)
if analysis_run.status != "completed":
raise ValueError(
f"analysis run {analysis_run_id} is {analysis_run.status}, not completed"
)
snapshot = (
service.store.get_snapshot(analysis_run.snapshot_id)
if analysis_run.snapshot_id is not None
else None
)
facts = service.list_observed_facts(repository_id, analysis_run_id)
chunks = service.list_content_chunks(repository_id, analysis_run_id)
graph = service.candidate_graph(repository_id, analysis_run_id)
ability_map = service.ability_map(repository_id)
decisions = service.list_review_decisions(repository_id, analysis_run_id)
engine_identity = _engine_identity(
analysis_run.scanner_version,
Path(engine_root or Path.cwd()),
)
regression_patterns = _known_regression_patterns(graph.abilities, decisions)
comparison_eligibility = _comparison_eligibility(
role,
engine_identity["release_binding_status"],
)
artifact_summary = summary or _summary(role, regression_patterns)
return {
"schema_version": SCHEMA_VERSION,
"artifact_id": _artifact_id(repository.name, analysis_run_id, role),
"artifact_type": "assessment_run",
"created_at": datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
"target_repository": {
"repo_slug": _slug(repository.name),
"repository_id": repository.id,
"source": snapshot.source_path if snapshot is not None else repository.url,
"target_commit": snapshot.commit_hash if snapshot is not None else "unknown",
"target_branch": snapshot.branch if snapshot is not None else repository.branch,
"dirty_state": _dirty_state(Path(snapshot.source_path)) if snapshot is not None else "unknown",
"file_count": snapshot.file_count if snapshot is not None else None,
},
"engine_identity": engine_identity,
"execution": {
"mode": _execution_mode(decisions),
"analysis_run_id": analysis_run.id,
"candidate_source": _candidate_source(decisions),
"acceptance_mode": _acceptance_mode(decisions),
"started_at": _timestamp(analysis_run.started_at),
"completed_at": _timestamp(analysis_run.completed_at),
},
"assessment": {
"role": role,
"outcome": outcome,
"summary": artifact_summary,
"reviewer": reviewer,
"comparison_eligibility": comparison_eligibility,
"rationale": _rationale(regression_patterns, comparison_eligibility),
},
"fact_summary": _fact_summary(facts),
"content_chunk_summary": _content_chunk_summary(chunks),
"generated_tree": {
"abilities": [_candidate_ability(ability) for ability in graph.abilities]
},
"approved_map": _approved_map(ability_map),
"review_decisions": [_review_decision(decision) for decision in decisions],
"quality_gate_outcomes": [],
"known_regression_patterns": regression_patterns,
"notes": [
"Generated by repo-scoping self-scoping assessment exporter.",
(
"Artifact is not comparable as a preferred baseline until engine "
"identity is complete."
if comparison_eligibility == "not_comparable"
else "Artifact has enough engine identity metadata for comparison."
),
],
}
def _engine_identity(scanner_version: str, engine_root: Path) -> dict[str, Any]:
engine_commit = _git_value(engine_root, "rev-parse", "HEAD")
dirty_state = _dirty_state(engine_root)
release = _git_value(engine_root, "describe", "--tags", "--exact-match")
release_binding_status = "complete" if engine_commit else "unbound"
return {
"repo_scoping_version": _package_version(),
"engine_commit": engine_commit,
"engine_release": release,
"engine_dirty_state": dirty_state,
"scanner_version": scanner_version,
"candidate_generator_version": "unversioned",
"quality_criteria_version": "none",
"prompt_version": None,
"release_binding_status": release_binding_status,
"release_binding_note": (
"Engine commit was captured from git."
if engine_commit
else "Engine commit could not be captured; artifact is not comparable."
),
}
def _package_version() -> str:
try:
return metadata.version("repo-registry")
except metadata.PackageNotFoundError:
return "unknown"
def _git_value(root: Path, *args: str) -> str | None:
try:
result = subprocess.run(
["git", "-C", str(root), *args],
check=False,
capture_output=True,
text=True,
)
except OSError:
return None
value = result.stdout.strip()
return value if result.returncode == 0 and value else None
def _dirty_state(root: Path) -> str:
if not (root / ".git").exists():
return "unknown"
try:
result = subprocess.run(
["git", "-C", str(root), "status", "--short"],
check=False,
capture_output=True,
text=True,
)
except OSError:
return "unknown"
if result.returncode != 0:
return "unknown"
return "dirty" if result.stdout.strip() else "clean"
def _comparison_eligibility(role: str, release_binding_status: str) -> str:
if role == "negative_regression_seed":
return "eligible_as_negative_seed"
if release_binding_status == "complete":
return "eligible"
return "not_comparable"
def _summary(role: str, regression_patterns: list[dict[str, str]]) -> str:
if role == "negative_regression_seed":
return "Historical run captured as a negative self-scoping regression seed."
if regression_patterns:
return "Generated self-scoping assessment repeats known regression patterns."
return "Generated self-scoping assessment artifact for comparison."
def _rationale(
regression_patterns: list[dict[str, str]],
comparison_eligibility: str,
) -> list[str]:
rationale: list[str] = []
if comparison_eligibility == "not_comparable":
rationale.append("Engine identity is incomplete, so this cannot be a comparable baseline.")
for pattern in regression_patterns:
rationale.append(f"{pattern['id']}: {pattern['description']}")
return rationale
def _fact_summary(facts: list[ObservedFact]) -> dict[str, Any]:
return {
"counts_by_kind": dict(sorted(Counter(fact.kind for fact in facts).items())),
"contamination_sources": _contamination_sources(facts),
}
def _contamination_sources(facts: list[ObservedFact]) -> list[dict[str, str]]:
provider_kinds = {
"llm_provider",
"credential_config",
"provider_registry",
"fallback_policy",
}
suspicious_segments = (
"test",
"tests/",
"fixtures",
"expectations",
"schemas.py",
"scanner.py",
"normalization.py",
"workplans/",
)
results: list[dict[str, str]] = []
seen: set[str] = set()
for fact in facts:
lower = fact.path.lower()
if fact.kind not in provider_kinds or not any(segment in lower for segment in suspicious_segments):
continue
if fact.path in seen:
continue
seen.add(fact.path)
results.append(
{
"path": fact.path,
"reason": (
"Provider-related fact came from scanner rules, tests, fixtures, "
"schemas, or workplan context and needs native-utility review."
),
}
)
return sorted(results, key=lambda item: item["path"])
def _content_chunk_summary(chunks: list[ContentChunk]) -> dict[str, Any]:
source_roles = Counter(
str(chunk.metadata.get("source_role", "") or "unknown") for chunk in chunks
)
return {
"total": len(chunks),
"counts_by_kind": dict(sorted(Counter(chunk.kind for chunk in chunks).items())),
"counts_by_source_role": dict(sorted(source_roles.items())),
"paths": sorted({chunk.path for chunk in chunks}),
}
def _candidate_ability(ability: CandidateAbility) -> dict[str, Any]:
return {
"name": ability.name,
"status": ability.status,
"primary_class": ability.primary_class,
"source_refs": [_source_ref(ref) for ref in ability.source_refs],
"capabilities": [
_candidate_capability(capability) for capability in ability.capabilities
],
}
def _candidate_capability(capability: CandidateCapability) -> dict[str, Any]:
return {
"name": capability.name,
"status": capability.status,
"primary_class": capability.primary_class,
"source_refs": [_source_ref(ref) for ref in capability.source_refs],
"features": [_candidate_feature(feature) for feature in capability.features],
"evidence": [_candidate_evidence(evidence) for evidence in capability.evidence],
}
def _candidate_feature(feature: CandidateFeature) -> dict[str, Any]:
return {
"name": feature.name,
"type": feature.type,
"status": feature.status,
"primary_class": feature.primary_class,
"location": feature.location,
"source_refs": [_source_ref(ref) for ref in feature.source_refs],
}
def _candidate_evidence(evidence: CandidateEvidence) -> dict[str, Any]:
return {
"type": evidence.type,
"reference": evidence.reference,
"strength": evidence.strength,
"status": evidence.status,
"source_refs": [_source_ref(ref) for ref in evidence.source_refs],
}
def _approved_map(ability_map: RepositoryAbilityMap) -> dict[str, Any]:
return {
"scope": asdict(ability_map.scope),
"abilities": [_approved_ability(ability) for ability in ability_map.abilities],
}
def _approved_ability(ability: Ability) -> dict[str, Any]:
return {
"name": ability.name,
"primary_class": ability.primary_class,
"capabilities": [
{
"name": capability.name,
"primary_class": capability.primary_class,
"features": [
{
"name": feature.name,
"type": feature.type,
"primary_class": feature.primary_class,
"location": feature.location,
"source_refs": [
_source_ref(ref) for ref in feature.source_refs
],
}
for feature in capability.features
],
"evidence": [asdict(evidence) for evidence in capability.evidence],
}
for capability in ability.capabilities
],
}
def _source_ref(ref: SourceReference) -> dict[str, Any]:
return asdict(ref)
def _review_decision(decision: ReviewDecision) -> dict[str, Any]:
return asdict(decision)
def _known_regression_patterns(
abilities: list[CandidateAbility],
decisions: list[ReviewDecision],
) -> list[dict[str, str]]:
patterns: list[dict[str, str]] = []
llm_capabilities = [
capability
for ability in abilities
for capability in ability.capabilities
if capability.name == KNOWN_PROVIDER_ROUTING_CAPABILITY
]
if llm_capabilities:
patterns.append(
{
"id": "RREG-SELF-REG-001",
"title": "LLM provider vocabulary promoted as native capability",
"severity": "critical",
"description": (
"Generated tree contains Route LLM Requests Across Providers "
"as a repo-scoping capability."
),
"detection_hint": (
"Flag the provider-routing capability unless product intent "
"and public implementation explicitly support it."
),
}
)
if any(
feature.type in {"API", "CLI"}
for capability in llm_capabilities
for feature in capability.features
):
patterns.append(
{
"id": "RREG-SELF-REG-002",
"title": "Native API and CLI surfaces attached under false capability",
"severity": "high",
"description": (
"API or CLI surface features are nested below provider routing."
),
"detection_hint": (
"Flag API/CLI surface features whose parent capability is "
"llm-integration or provider-routing."
),
}
)
if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions):
patterns.append(
{
"id": "RREG-SELF-REG-003",
"title": "Deterministic trusted auto-approval accepted candidate truth",
"severity": "high",
"description": (
"Candidate characteristics were approved through trusted "
"auto-approval instead of human or agentic judgement."
),
"detection_hint": "Flag trusted_auto_approve_candidate_graph review decisions.",
}
)
return patterns
def _execution_mode(decisions: list[ReviewDecision]) -> str:
if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions):
return "trusted-auto-review"
if any(decision.action == "llm_extraction_used" for decision in decisions):
return "llm-assisted"
if any(decision.action.startswith("approve") for decision in decisions):
return "manual-review"
return "deterministic-only"
def _candidate_source(decisions: list[ReviewDecision]) -> str:
return "llm+deterministic" if any(
decision.action == "llm_extraction_used" for decision in decisions
) else "deterministic"
def _acceptance_mode(decisions: list[ReviewDecision]) -> str:
if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions):
return "trusted_auto_approve_candidate_graph"
if any(decision.action == "approve_candidate_graph" for decision in decisions):
return "manual_candidate_graph_approval"
if any(decision.action == "approve_analysis_run_changes" for decision in decisions):
return "manual_change_approval"
return "pending_review"
def _timestamp(value: str | None) -> str | None:
if value is None:
return None
if "T" in value:
return value
return value.replace(" ", "T") + "Z"
def _artifact_id(repository_name: str, analysis_run_id: int, role: str) -> str:
return f"{_slug(repository_name)}-{role}-run-{analysis_run_id}"
def _slug(value: str) -> str:
return "-".join(
token for token in "".join(char.lower() if char.isalnum() else "-" for char in value).split("-") if token
)
def artifact_json(artifact: dict[str, Any]) -> str:
return json.dumps(artifact, indent=2, sort_keys=True) + "\n"

View File

@@ -1,3 +1,5 @@
import json
import pytest
from repo_registry.cli import main
@@ -98,3 +100,34 @@ def test_rebuild_cli_refuses_destructive_all_without_confirm_all(tmp_path):
)
assert exc.value.code == 2
def test_export_assessment_cli_writes_completed_run_artifact(tmp_path):
service = make_service(tmp_path)
source = write_repo(tmp_path)
repository = service.register_repository(name="CLI Export", url=str(source))
summary = service.analyze_repository(repository.id, use_llm_assistance=False)
output_path = tmp_path / "assessment.json"
exit_code = main(
[
"export-assessment",
"--repo",
str(repository.id),
"--analysis-run",
str(summary.analysis_run.id),
"--output",
str(output_path),
"--database-path",
str(tmp_path / "registry.sqlite3"),
"--checkout-root",
str(tmp_path / "checkouts"),
]
)
artifact = json.loads(output_path.read_text(encoding="utf-8"))
assert exit_code == 0
assert artifact["target_repository"]["repo_slug"] == "cli-export"
assert artifact["execution"]["analysis_run_id"] == summary.analysis_run.id
assert artifact["assessment"]["role"] == "challenger"
assert artifact["generated_tree"]["abilities"]

View File

@@ -36,7 +36,11 @@ def test_self_scoping_assessment_schema_requires_release_binding_metadata():
"execution",
"assessment",
"fact_summary",
"content_chunk_summary",
"generated_tree",
"approved_map",
"review_decisions",
"quality_gate_outcomes",
"known_regression_patterns",
} <= required
assert {
@@ -79,6 +83,10 @@ def test_known_bad_self_scoping_artifact_captures_rejected_regression_seed():
assert "Route LLM Requests Across Providers" in capability_names
assert {"RREG-SELF-REG-001", "RREG-SELF-REG-002", "RREG-SELF-REG-003"} <= regression_ids
assert artifact["fact_summary"]["counts_by_kind"]["llm_provider"] == 41
assert "content_chunk_summary" in artifact
assert "approved_map" in artifact
assert artifact["review_decisions"][0]["action"] == "trusted_auto_approve_candidate_graph"
assert artifact["quality_gate_outcomes"] == []
def test_golden_profile_names_expected_native_capabilities_and_forbidden_false_positive():
@@ -109,4 +117,3 @@ def test_golden_profile_names_expected_native_capabilities_and_forbidden_false_p
assert profile["comparison_rules"]["must_not_have_native_capability_names"] == [
"Route LLM Requests Across Providers"
]

View File

@@ -0,0 +1,97 @@
from repo_registry.core.service import RegistryService
from repo_registry.repo_ingestion.git import GitIngestionService
from repo_registry.self_scoping.assessment import export_assessment_artifact
from repo_registry.storage.sqlite import RegistryStore
def make_service(tmp_path):
store = RegistryStore(tmp_path / "registry.sqlite3")
store.initialize()
return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts"))
def write_repo(tmp_path):
source = tmp_path / "repo"
source.mkdir()
(source / "README.md").write_text(
"# Exportable Repo\n\nReports service health.\n",
encoding="utf-8",
)
(source / "app.py").write_text(
'@app.get("/health")\n'
"def health():\n"
" return {}\n",
encoding="utf-8",
)
return source
def test_export_assessment_artifact_binds_analysis_to_engine_identity(tmp_path):
service = make_service(tmp_path)
source = write_repo(tmp_path)
repository = service.register_repository(
name="Exportable Repo",
url=str(source),
)
summary = service.analyze_repository(
repository.id,
use_llm_assistance=False,
)
artifact = export_assessment_artifact(
service,
repository.id,
summary.analysis_run.id,
role="challenger",
outcome="challenger",
reviewer="test",
)
assert artifact["schema_version"] == "self-scoping-assessment/v1"
assert artifact["artifact_id"] == "exportable-repo-challenger-run-1"
assert artifact["target_repository"]["repo_slug"] == "exportable-repo"
assert artifact["target_repository"]["target_commit"]
assert artifact["engine_identity"]["engine_commit"]
assert artifact["engine_identity"]["release_binding_status"] == "complete"
assert artifact["assessment"]["comparison_eligibility"] == "eligible"
assert artifact["execution"]["mode"] == "deterministic-only"
assert artifact["content_chunk_summary"]["total"] > 0
assert artifact["generated_tree"]["abilities"]
assert artifact["approved_map"]["abilities"] == []
assert artifact["review_decisions"] == []
assert artifact["quality_gate_outcomes"] == []
def test_export_assessment_artifact_flags_known_provider_regression(tmp_path):
service = make_service(tmp_path)
source = tmp_path / "repo"
source.mkdir()
(source / "README.md").write_text("# Provider Vocabulary\n", encoding="utf-8")
(source / "providers.py").write_text(
"provider_registry = {'openrouter': OpenRouterAdapter}\n",
encoding="utf-8",
)
repository = service.register_repository(
name="Provider Vocabulary",
url=str(source),
)
summary = service.analyze_repository(
repository.id,
use_llm_assistance=False,
)
artifact = export_assessment_artifact(
service,
repository.id,
summary.analysis_run.id,
role="challenger",
outcome="challenger",
reviewer="test",
)
regression_ids = {item["id"] for item in artifact["known_regression_patterns"]}
assert "RREG-SELF-REG-001" in regression_ids
assert any(
item["path"] == "providers.py"
for item in artifact["fact_summary"]["contamination_sources"]
) is False

View File

@@ -139,7 +139,7 @@ the original analysis run did not record the engine commit.
```task
id: RREG-WP-0013-T04
status: todo
status: done
priority: high
state_hub_task_id: "51e01d45-7574-4c97-994d-dabb2bcf9a00"
```
@@ -156,6 +156,14 @@ Acceptance criteria:
incomplete.
- Export can target repo-scoping itself without requiring network access.
Implementation note 2026-05-15: added
`src/repo_registry/self_scoping/assessment.py` and the
`repo-scoping export-assessment` CLI command. The exporter reads an existing
completed analysis run, records engine identity, generated candidate tree,
approved map, fact/content summaries, review decisions, empty quality-gate
outcomes pending RREG-WP-0014, and known regression patterns. Focused tests cover
the exporter and CLI path.
## T05: Compare Baseline And Challenger Runs
```task