generated from coulomb/repo-seed
Add self-scoping assessment export command
This commit is contained in:
@@ -15,7 +15,11 @@
|
||||
"execution",
|
||||
"assessment",
|
||||
"fact_summary",
|
||||
"content_chunk_summary",
|
||||
"generated_tree",
|
||||
"approved_map",
|
||||
"review_decisions",
|
||||
"quality_gate_outcomes",
|
||||
"known_regression_patterns"
|
||||
],
|
||||
"properties": {
|
||||
@@ -48,9 +52,27 @@
|
||||
"fact_summary": {
|
||||
"$ref": "#/$defs/factSummary"
|
||||
},
|
||||
"content_chunk_summary": {
|
||||
"$ref": "#/$defs/contentChunkSummary"
|
||||
},
|
||||
"generated_tree": {
|
||||
"$ref": "#/$defs/generatedTree"
|
||||
},
|
||||
"approved_map": {
|
||||
"$ref": "#/$defs/approvedMap"
|
||||
},
|
||||
"review_decisions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/reviewDecision"
|
||||
}
|
||||
},
|
||||
"quality_gate_outcomes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/qualityGateOutcome"
|
||||
}
|
||||
},
|
||||
"known_regression_patterns": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@@ -255,6 +277,37 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"contentChunkSummary": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["total", "counts_by_kind", "counts_by_source_role", "paths"],
|
||||
"properties": {
|
||||
"total": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
},
|
||||
"counts_by_kind": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
}
|
||||
},
|
||||
"counts_by_source_role": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
}
|
||||
},
|
||||
"paths": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"generatedTree": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
@@ -271,7 +324,7 @@
|
||||
"ability": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["name", "status", "primary_class", "capabilities"],
|
||||
"required": ["name", "status", "primary_class", "source_refs", "capabilities"],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
@@ -282,6 +335,12 @@
|
||||
"primary_class": {
|
||||
"type": "string"
|
||||
},
|
||||
"source_refs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/sourceRef"
|
||||
}
|
||||
},
|
||||
"capabilities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@@ -293,7 +352,14 @@
|
||||
"capability": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["name", "status", "primary_class", "features"],
|
||||
"required": [
|
||||
"name",
|
||||
"status",
|
||||
"primary_class",
|
||||
"source_refs",
|
||||
"features",
|
||||
"evidence"
|
||||
],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
@@ -304,18 +370,37 @@
|
||||
"primary_class": {
|
||||
"type": "string"
|
||||
},
|
||||
"source_refs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/sourceRef"
|
||||
}
|
||||
},
|
||||
"features": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/feature"
|
||||
}
|
||||
},
|
||||
"evidence": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/candidateEvidence"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"feature": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["name", "type", "status", "primary_class", "location"],
|
||||
"required": [
|
||||
"name",
|
||||
"type",
|
||||
"status",
|
||||
"primary_class",
|
||||
"location",
|
||||
"source_refs"
|
||||
],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
@@ -331,9 +416,104 @@
|
||||
},
|
||||
"location": {
|
||||
"type": "string"
|
||||
},
|
||||
"source_refs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/sourceRef"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"candidateEvidence": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["type", "reference", "strength", "status", "source_refs"],
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string"
|
||||
},
|
||||
"reference": {
|
||||
"type": "string"
|
||||
},
|
||||
"strength": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"source_refs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/sourceRef"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"sourceRef": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["fact_id", "path", "kind", "name", "line"],
|
||||
"properties": {
|
||||
"fact_id": {
|
||||
"type": ["integer", "null"]
|
||||
},
|
||||
"path": {
|
||||
"type": "string"
|
||||
},
|
||||
"kind": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": ["integer", "null"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"approvedMap": {
|
||||
"type": "object",
|
||||
"description": "Current approved ability map at export time.",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"reviewDecision": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"repository_id",
|
||||
"analysis_run_id",
|
||||
"action",
|
||||
"notes",
|
||||
"created_at"
|
||||
],
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "integer"
|
||||
},
|
||||
"repository_id": {
|
||||
"type": "integer"
|
||||
},
|
||||
"analysis_run_id": {
|
||||
"type": ["integer", "null"]
|
||||
},
|
||||
"action": {
|
||||
"type": "string"
|
||||
},
|
||||
"notes": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"qualityGateOutcome": {
|
||||
"type": "object",
|
||||
"description": "Versioned deterministic quality-gate outcome. Empty until RREG-WP-0014 introduces gates.",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"regressionPattern": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
@@ -411,23 +591,35 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"content_chunk_summary": {
|
||||
"total": 0,
|
||||
"counts_by_kind": {},
|
||||
"counts_by_source_role": {},
|
||||
"paths": []
|
||||
},
|
||||
"generated_tree": {
|
||||
"abilities": [
|
||||
{
|
||||
"name": "Support Repo Registry",
|
||||
"status": "approved",
|
||||
"primary_class": "repository-intelligence",
|
||||
"source_refs": [],
|
||||
"capabilities": [
|
||||
{
|
||||
"name": "Route LLM Requests Across Providers",
|
||||
"status": "approved",
|
||||
"primary_class": "llm-integration",
|
||||
"features": []
|
||||
"source_refs": [],
|
||||
"features": [],
|
||||
"evidence": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"approved_map": {},
|
||||
"review_decisions": [],
|
||||
"quality_gate_outcomes": [],
|
||||
"known_regression_patterns": [
|
||||
{
|
||||
"id": "RREG-SELF-REG-001",
|
||||
|
||||
@@ -33,3 +33,18 @@ assessment. Compare the challenger to the golden profile and to the negative
|
||||
seed. Reviewers should be able to choose whether the old result, new result, or
|
||||
neither is better, then store that judgement as a new assessment outcome.
|
||||
|
||||
## Export Command
|
||||
|
||||
Export a completed analysis run as a challenger artifact:
|
||||
|
||||
```bash
|
||||
repo-scoping export-assessment \
|
||||
--repo repo-scoping \
|
||||
--analysis-run 39 \
|
||||
--output docs/self-scoping/assessments/repo-scoping-challenger-run-39.json
|
||||
```
|
||||
|
||||
The command reads an existing registry database and does not clone or scan the
|
||||
target repository. It records the target analysis metadata, candidate graph,
|
||||
approved map at export time, review decisions, fact and content summaries, known
|
||||
regression patterns, and current repo-scoping engine identity.
|
||||
|
||||
@@ -91,94 +91,125 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"content_chunk_summary": {
|
||||
"total": 0,
|
||||
"counts_by_kind": {},
|
||||
"counts_by_source_role": {},
|
||||
"paths": []
|
||||
},
|
||||
"generated_tree": {
|
||||
"abilities": [
|
||||
{
|
||||
"name": "Support Repo Registry",
|
||||
"status": "approved",
|
||||
"primary_class": "repository-intelligence",
|
||||
"source_refs": [],
|
||||
"capabilities": [
|
||||
{
|
||||
"name": "Route LLM Requests Across Providers",
|
||||
"status": "approved",
|
||||
"primary_class": "llm-integration",
|
||||
"source_refs": [],
|
||||
"features": [
|
||||
{
|
||||
"name": "Use Anthropic Models",
|
||||
"type": "integration",
|
||||
"status": "approved",
|
||||
"primary_class": "integration",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "Use Claude Models",
|
||||
"type": "integration",
|
||||
"status": "approved",
|
||||
"primary_class": "integration",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "Use Gemini Models",
|
||||
"type": "integration",
|
||||
"status": "approved",
|
||||
"primary_class": "integration",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "Use OpenAI Models",
|
||||
"type": "integration",
|
||||
"status": "approved",
|
||||
"primary_class": "integration",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "Use OpenRouter Models",
|
||||
"type": "integration",
|
||||
"status": "approved",
|
||||
"primary_class": "integration",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "Configure LLM Provider Credentials",
|
||||
"type": "configuration",
|
||||
"status": "approved",
|
||||
"primary_class": "configuration",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "Maintain LLM Provider Registry",
|
||||
"type": "backend",
|
||||
"status": "approved",
|
||||
"primary_class": "backend",
|
||||
"location": "src/repo_registry/repo_scanning/scanner.py"
|
||||
"location": "src/repo_registry/repo_scanning/scanner.py",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "Apply LLM Provider Fallback Policy",
|
||||
"type": "backend",
|
||||
"status": "approved",
|
||||
"primary_class": "backend",
|
||||
"location": "src/repo_registry/repo_scanning/scanner.py"
|
||||
"location": "src/repo_registry/repo_scanning/scanner.py",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "HTTP API surface: possible API surface, GET /health, @app.post(, +43 more",
|
||||
"type": "API",
|
||||
"status": "approved",
|
||||
"primary_class": "API",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
},
|
||||
{
|
||||
"name": "CLI command surface: CLI command build_parser, CLI command make_service",
|
||||
"type": "CLI",
|
||||
"status": "approved",
|
||||
"primary_class": "CLI",
|
||||
"location": "multiple files"
|
||||
"location": "multiple files",
|
||||
"source_refs": []
|
||||
}
|
||||
]
|
||||
],
|
||||
"evidence": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"approved_map": {},
|
||||
"review_decisions": [
|
||||
{
|
||||
"id": 21,
|
||||
"repository_id": 16,
|
||||
"analysis_run_id": 39,
|
||||
"action": "trusted_auto_approve_candidate_graph",
|
||||
"notes": "Trusted auto-populate mode reviewed candidate graph after deterministic candidate generation. Auto-approved 1 safe candidate capability(s); left 0 for review. Approved: Route LLM Requests Across Providers: eligible LLM utility relationship with source support.",
|
||||
"created_at": "2026-05-15 09:28:49"
|
||||
}
|
||||
],
|
||||
"quality_gate_outcomes": [],
|
||||
"known_regression_patterns": [
|
||||
{
|
||||
"id": "RREG-SELF-REG-001",
|
||||
|
||||
@@ -8,6 +8,7 @@ from repo_registry.core.models import CharacteristicRebuildResult, Repository
|
||||
from repo_registry.core.service import RegistryService
|
||||
from repo_registry.llm_extraction import LLMCandidateExtractor, create_llm_connect_adapter
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.self_scoping.assessment import artifact_json, export_assessment_artifact
|
||||
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
||||
from repo_registry.web_api.app import Settings
|
||||
|
||||
@@ -44,6 +45,37 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
)
|
||||
rebuild.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
|
||||
rebuild.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
|
||||
export = subparsers.add_parser(
|
||||
"export-assessment",
|
||||
help="Export a completed analysis run as a self-scoping assessment artifact.",
|
||||
)
|
||||
export.add_argument("--repo", required=True, help="Repository id or exact repository name.")
|
||||
export.add_argument("--analysis-run", type=int, required=True, help="Completed analysis run id.")
|
||||
export.add_argument("--output", help="Write artifact JSON to this path instead of stdout.")
|
||||
export.add_argument(
|
||||
"--role",
|
||||
choices=["baseline", "challenger", "negative_regression_seed"],
|
||||
default="challenger",
|
||||
help="Assessment artifact role.",
|
||||
)
|
||||
export.add_argument(
|
||||
"--outcome",
|
||||
choices=[
|
||||
"baseline",
|
||||
"challenger",
|
||||
"preferred",
|
||||
"tied",
|
||||
"rejected",
|
||||
"superseded",
|
||||
"needs-human",
|
||||
],
|
||||
default="challenger",
|
||||
help="Initial assessment outcome.",
|
||||
)
|
||||
export.add_argument("--reviewer", default="codex", help="Reviewer name recorded in the artifact.")
|
||||
export.add_argument("--summary", help="Assessment summary override.")
|
||||
export.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
|
||||
export.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
|
||||
return parser
|
||||
|
||||
|
||||
@@ -52,6 +84,8 @@ def main(argv: Sequence[str] | None = None) -> int:
|
||||
args = parser.parse_args(argv)
|
||||
if args.command == "rebuild-characteristics":
|
||||
return rebuild_characteristics_command(args, parser)
|
||||
if args.command == "export-assessment":
|
||||
return export_assessment_command(args, parser)
|
||||
parser.error(f"unknown command: {args.command}")
|
||||
return 2
|
||||
|
||||
@@ -88,6 +122,38 @@ def rebuild_characteristics_command(
|
||||
return 0
|
||||
|
||||
|
||||
def export_assessment_command(
|
||||
args: argparse.Namespace,
|
||||
parser: argparse.ArgumentParser,
|
||||
) -> int:
|
||||
service = service_from_args(args)
|
||||
repositories = selected_repositories(service, args)
|
||||
if not repositories:
|
||||
parser.error("no repositories matched the requested target")
|
||||
if len(repositories) > 1:
|
||||
parser.error("assessment export requires exactly one repository")
|
||||
repository = repositories[0]
|
||||
try:
|
||||
artifact = export_assessment_artifact(
|
||||
service,
|
||||
repository.id,
|
||||
args.analysis_run,
|
||||
role=args.role,
|
||||
outcome=args.outcome,
|
||||
reviewer=args.reviewer,
|
||||
summary=args.summary,
|
||||
)
|
||||
except (NotFoundError, ValueError) as exc:
|
||||
parser.error(str(exc))
|
||||
|
||||
content = artifact_json(artifact)
|
||||
if args.output:
|
||||
Path(args.output).write_text(content, encoding="utf-8")
|
||||
else:
|
||||
print(content, end="")
|
||||
return 0
|
||||
|
||||
|
||||
def service_from_args(args: argparse.Namespace) -> RegistryService:
|
||||
settings = Settings()
|
||||
database_path = Path(args.database_path or settings.database_path)
|
||||
@@ -96,7 +162,8 @@ def service_from_args(args: argparse.Namespace) -> RegistryService:
|
||||
store = RegistryStore(database_path)
|
||||
store.initialize()
|
||||
llm_extractor = None
|
||||
if not args.no_llm and settings.llm_enabled and settings.llm_provider:
|
||||
no_llm = getattr(args, "no_llm", True)
|
||||
if not no_llm and settings.llm_enabled and settings.llm_provider:
|
||||
adapter = create_llm_connect_adapter(settings.llm_provider, model=settings.llm_model)
|
||||
llm_extractor = LLMCandidateExtractor(adapter)
|
||||
return RegistryService(
|
||||
@@ -111,7 +178,7 @@ def selected_repositories(
|
||||
args: argparse.Namespace,
|
||||
) -> list[Repository]:
|
||||
repositories = service.list_repositories()
|
||||
if args.all:
|
||||
if getattr(args, "all", False):
|
||||
return repositories
|
||||
repo = str(args.repo)
|
||||
if repo.isdigit():
|
||||
|
||||
3
src/repo_registry/self_scoping/__init__.py
Normal file
3
src/repo_registry/self_scoping/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from repo_registry.self_scoping.assessment import export_assessment_artifact
|
||||
|
||||
__all__ = ["export_assessment_artifact"]
|
||||
462
src/repo_registry/self_scoping/assessment.py
Normal file
462
src/repo_registry/self_scoping/assessment.py
Normal file
@@ -0,0 +1,462 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from collections import Counter
|
||||
from dataclasses import asdict
|
||||
from datetime import UTC, datetime
|
||||
from importlib import metadata
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from repo_registry.core.models import (
|
||||
Ability,
|
||||
CandidateAbility,
|
||||
CandidateCapability,
|
||||
CandidateEvidence,
|
||||
CandidateFeature,
|
||||
ContentChunk,
|
||||
ObservedFact,
|
||||
RepositoryAbilityMap,
|
||||
ReviewDecision,
|
||||
SourceReference,
|
||||
)
|
||||
from repo_registry.core.service import RegistryService
|
||||
|
||||
|
||||
SCHEMA_VERSION = "self-scoping-assessment/v1"
|
||||
KNOWN_PROVIDER_ROUTING_CAPABILITY = "Route LLM Requests Across Providers"
|
||||
|
||||
|
||||
def export_assessment_artifact(
|
||||
service: RegistryService,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
*,
|
||||
role: str = "challenger",
|
||||
outcome: str = "challenger",
|
||||
reviewer: str = "codex",
|
||||
summary: str | None = None,
|
||||
engine_root: str | Path | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Export a completed analysis run as a self-scoping assessment artifact."""
|
||||
|
||||
repository = service.get_repository(repository_id)
|
||||
analysis_run = service.get_analysis_run(repository_id, analysis_run_id)
|
||||
if analysis_run.status != "completed":
|
||||
raise ValueError(
|
||||
f"analysis run {analysis_run_id} is {analysis_run.status}, not completed"
|
||||
)
|
||||
snapshot = (
|
||||
service.store.get_snapshot(analysis_run.snapshot_id)
|
||||
if analysis_run.snapshot_id is not None
|
||||
else None
|
||||
)
|
||||
facts = service.list_observed_facts(repository_id, analysis_run_id)
|
||||
chunks = service.list_content_chunks(repository_id, analysis_run_id)
|
||||
graph = service.candidate_graph(repository_id, analysis_run_id)
|
||||
ability_map = service.ability_map(repository_id)
|
||||
decisions = service.list_review_decisions(repository_id, analysis_run_id)
|
||||
engine_identity = _engine_identity(
|
||||
analysis_run.scanner_version,
|
||||
Path(engine_root or Path.cwd()),
|
||||
)
|
||||
regression_patterns = _known_regression_patterns(graph.abilities, decisions)
|
||||
comparison_eligibility = _comparison_eligibility(
|
||||
role,
|
||||
engine_identity["release_binding_status"],
|
||||
)
|
||||
artifact_summary = summary or _summary(role, regression_patterns)
|
||||
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"artifact_id": _artifact_id(repository.name, analysis_run_id, role),
|
||||
"artifact_type": "assessment_run",
|
||||
"created_at": datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
||||
"target_repository": {
|
||||
"repo_slug": _slug(repository.name),
|
||||
"repository_id": repository.id,
|
||||
"source": snapshot.source_path if snapshot is not None else repository.url,
|
||||
"target_commit": snapshot.commit_hash if snapshot is not None else "unknown",
|
||||
"target_branch": snapshot.branch if snapshot is not None else repository.branch,
|
||||
"dirty_state": _dirty_state(Path(snapshot.source_path)) if snapshot is not None else "unknown",
|
||||
"file_count": snapshot.file_count if snapshot is not None else None,
|
||||
},
|
||||
"engine_identity": engine_identity,
|
||||
"execution": {
|
||||
"mode": _execution_mode(decisions),
|
||||
"analysis_run_id": analysis_run.id,
|
||||
"candidate_source": _candidate_source(decisions),
|
||||
"acceptance_mode": _acceptance_mode(decisions),
|
||||
"started_at": _timestamp(analysis_run.started_at),
|
||||
"completed_at": _timestamp(analysis_run.completed_at),
|
||||
},
|
||||
"assessment": {
|
||||
"role": role,
|
||||
"outcome": outcome,
|
||||
"summary": artifact_summary,
|
||||
"reviewer": reviewer,
|
||||
"comparison_eligibility": comparison_eligibility,
|
||||
"rationale": _rationale(regression_patterns, comparison_eligibility),
|
||||
},
|
||||
"fact_summary": _fact_summary(facts),
|
||||
"content_chunk_summary": _content_chunk_summary(chunks),
|
||||
"generated_tree": {
|
||||
"abilities": [_candidate_ability(ability) for ability in graph.abilities]
|
||||
},
|
||||
"approved_map": _approved_map(ability_map),
|
||||
"review_decisions": [_review_decision(decision) for decision in decisions],
|
||||
"quality_gate_outcomes": [],
|
||||
"known_regression_patterns": regression_patterns,
|
||||
"notes": [
|
||||
"Generated by repo-scoping self-scoping assessment exporter.",
|
||||
(
|
||||
"Artifact is not comparable as a preferred baseline until engine "
|
||||
"identity is complete."
|
||||
if comparison_eligibility == "not_comparable"
|
||||
else "Artifact has enough engine identity metadata for comparison."
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _engine_identity(scanner_version: str, engine_root: Path) -> dict[str, Any]:
|
||||
engine_commit = _git_value(engine_root, "rev-parse", "HEAD")
|
||||
dirty_state = _dirty_state(engine_root)
|
||||
release = _git_value(engine_root, "describe", "--tags", "--exact-match")
|
||||
release_binding_status = "complete" if engine_commit else "unbound"
|
||||
return {
|
||||
"repo_scoping_version": _package_version(),
|
||||
"engine_commit": engine_commit,
|
||||
"engine_release": release,
|
||||
"engine_dirty_state": dirty_state,
|
||||
"scanner_version": scanner_version,
|
||||
"candidate_generator_version": "unversioned",
|
||||
"quality_criteria_version": "none",
|
||||
"prompt_version": None,
|
||||
"release_binding_status": release_binding_status,
|
||||
"release_binding_note": (
|
||||
"Engine commit was captured from git."
|
||||
if engine_commit
|
||||
else "Engine commit could not be captured; artifact is not comparable."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _package_version() -> str:
|
||||
try:
|
||||
return metadata.version("repo-registry")
|
||||
except metadata.PackageNotFoundError:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _git_value(root: Path, *args: str) -> str | None:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(root), *args],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except OSError:
|
||||
return None
|
||||
value = result.stdout.strip()
|
||||
return value if result.returncode == 0 and value else None
|
||||
|
||||
|
||||
def _dirty_state(root: Path) -> str:
|
||||
if not (root / ".git").exists():
|
||||
return "unknown"
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(root), "status", "--short"],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except OSError:
|
||||
return "unknown"
|
||||
if result.returncode != 0:
|
||||
return "unknown"
|
||||
return "dirty" if result.stdout.strip() else "clean"
|
||||
|
||||
|
||||
def _comparison_eligibility(role: str, release_binding_status: str) -> str:
|
||||
if role == "negative_regression_seed":
|
||||
return "eligible_as_negative_seed"
|
||||
if release_binding_status == "complete":
|
||||
return "eligible"
|
||||
return "not_comparable"
|
||||
|
||||
|
||||
def _summary(role: str, regression_patterns: list[dict[str, str]]) -> str:
|
||||
if role == "negative_regression_seed":
|
||||
return "Historical run captured as a negative self-scoping regression seed."
|
||||
if regression_patterns:
|
||||
return "Generated self-scoping assessment repeats known regression patterns."
|
||||
return "Generated self-scoping assessment artifact for comparison."
|
||||
|
||||
|
||||
def _rationale(
|
||||
regression_patterns: list[dict[str, str]],
|
||||
comparison_eligibility: str,
|
||||
) -> list[str]:
|
||||
rationale: list[str] = []
|
||||
if comparison_eligibility == "not_comparable":
|
||||
rationale.append("Engine identity is incomplete, so this cannot be a comparable baseline.")
|
||||
for pattern in regression_patterns:
|
||||
rationale.append(f"{pattern['id']}: {pattern['description']}")
|
||||
return rationale
|
||||
|
||||
|
||||
def _fact_summary(facts: list[ObservedFact]) -> dict[str, Any]:
|
||||
return {
|
||||
"counts_by_kind": dict(sorted(Counter(fact.kind for fact in facts).items())),
|
||||
"contamination_sources": _contamination_sources(facts),
|
||||
}
|
||||
|
||||
|
||||
def _contamination_sources(facts: list[ObservedFact]) -> list[dict[str, str]]:
|
||||
provider_kinds = {
|
||||
"llm_provider",
|
||||
"credential_config",
|
||||
"provider_registry",
|
||||
"fallback_policy",
|
||||
}
|
||||
suspicious_segments = (
|
||||
"test",
|
||||
"tests/",
|
||||
"fixtures",
|
||||
"expectations",
|
||||
"schemas.py",
|
||||
"scanner.py",
|
||||
"normalization.py",
|
||||
"workplans/",
|
||||
)
|
||||
results: list[dict[str, str]] = []
|
||||
seen: set[str] = set()
|
||||
for fact in facts:
|
||||
lower = fact.path.lower()
|
||||
if fact.kind not in provider_kinds or not any(segment in lower for segment in suspicious_segments):
|
||||
continue
|
||||
if fact.path in seen:
|
||||
continue
|
||||
seen.add(fact.path)
|
||||
results.append(
|
||||
{
|
||||
"path": fact.path,
|
||||
"reason": (
|
||||
"Provider-related fact came from scanner rules, tests, fixtures, "
|
||||
"schemas, or workplan context and needs native-utility review."
|
||||
),
|
||||
}
|
||||
)
|
||||
return sorted(results, key=lambda item: item["path"])
|
||||
|
||||
|
||||
def _content_chunk_summary(chunks: list[ContentChunk]) -> dict[str, Any]:
|
||||
source_roles = Counter(
|
||||
str(chunk.metadata.get("source_role", "") or "unknown") for chunk in chunks
|
||||
)
|
||||
return {
|
||||
"total": len(chunks),
|
||||
"counts_by_kind": dict(sorted(Counter(chunk.kind for chunk in chunks).items())),
|
||||
"counts_by_source_role": dict(sorted(source_roles.items())),
|
||||
"paths": sorted({chunk.path for chunk in chunks}),
|
||||
}
|
||||
|
||||
|
||||
def _candidate_ability(ability: CandidateAbility) -> dict[str, Any]:
|
||||
return {
|
||||
"name": ability.name,
|
||||
"status": ability.status,
|
||||
"primary_class": ability.primary_class,
|
||||
"source_refs": [_source_ref(ref) for ref in ability.source_refs],
|
||||
"capabilities": [
|
||||
_candidate_capability(capability) for capability in ability.capabilities
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _candidate_capability(capability: CandidateCapability) -> dict[str, Any]:
|
||||
return {
|
||||
"name": capability.name,
|
||||
"status": capability.status,
|
||||
"primary_class": capability.primary_class,
|
||||
"source_refs": [_source_ref(ref) for ref in capability.source_refs],
|
||||
"features": [_candidate_feature(feature) for feature in capability.features],
|
||||
"evidence": [_candidate_evidence(evidence) for evidence in capability.evidence],
|
||||
}
|
||||
|
||||
|
||||
def _candidate_feature(feature: CandidateFeature) -> dict[str, Any]:
|
||||
return {
|
||||
"name": feature.name,
|
||||
"type": feature.type,
|
||||
"status": feature.status,
|
||||
"primary_class": feature.primary_class,
|
||||
"location": feature.location,
|
||||
"source_refs": [_source_ref(ref) for ref in feature.source_refs],
|
||||
}
|
||||
|
||||
|
||||
def _candidate_evidence(evidence: CandidateEvidence) -> dict[str, Any]:
|
||||
return {
|
||||
"type": evidence.type,
|
||||
"reference": evidence.reference,
|
||||
"strength": evidence.strength,
|
||||
"status": evidence.status,
|
||||
"source_refs": [_source_ref(ref) for ref in evidence.source_refs],
|
||||
}
|
||||
|
||||
|
||||
def _approved_map(ability_map: RepositoryAbilityMap) -> dict[str, Any]:
|
||||
return {
|
||||
"scope": asdict(ability_map.scope),
|
||||
"abilities": [_approved_ability(ability) for ability in ability_map.abilities],
|
||||
}
|
||||
|
||||
|
||||
def _approved_ability(ability: Ability) -> dict[str, Any]:
|
||||
return {
|
||||
"name": ability.name,
|
||||
"primary_class": ability.primary_class,
|
||||
"capabilities": [
|
||||
{
|
||||
"name": capability.name,
|
||||
"primary_class": capability.primary_class,
|
||||
"features": [
|
||||
{
|
||||
"name": feature.name,
|
||||
"type": feature.type,
|
||||
"primary_class": feature.primary_class,
|
||||
"location": feature.location,
|
||||
"source_refs": [
|
||||
_source_ref(ref) for ref in feature.source_refs
|
||||
],
|
||||
}
|
||||
for feature in capability.features
|
||||
],
|
||||
"evidence": [asdict(evidence) for evidence in capability.evidence],
|
||||
}
|
||||
for capability in ability.capabilities
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _source_ref(ref: SourceReference) -> dict[str, Any]:
|
||||
return asdict(ref)
|
||||
|
||||
|
||||
def _review_decision(decision: ReviewDecision) -> dict[str, Any]:
|
||||
return asdict(decision)
|
||||
|
||||
|
||||
def _known_regression_patterns(
|
||||
abilities: list[CandidateAbility],
|
||||
decisions: list[ReviewDecision],
|
||||
) -> list[dict[str, str]]:
|
||||
patterns: list[dict[str, str]] = []
|
||||
llm_capabilities = [
|
||||
capability
|
||||
for ability in abilities
|
||||
for capability in ability.capabilities
|
||||
if capability.name == KNOWN_PROVIDER_ROUTING_CAPABILITY
|
||||
]
|
||||
if llm_capabilities:
|
||||
patterns.append(
|
||||
{
|
||||
"id": "RREG-SELF-REG-001",
|
||||
"title": "LLM provider vocabulary promoted as native capability",
|
||||
"severity": "critical",
|
||||
"description": (
|
||||
"Generated tree contains Route LLM Requests Across Providers "
|
||||
"as a repo-scoping capability."
|
||||
),
|
||||
"detection_hint": (
|
||||
"Flag the provider-routing capability unless product intent "
|
||||
"and public implementation explicitly support it."
|
||||
),
|
||||
}
|
||||
)
|
||||
if any(
|
||||
feature.type in {"API", "CLI"}
|
||||
for capability in llm_capabilities
|
||||
for feature in capability.features
|
||||
):
|
||||
patterns.append(
|
||||
{
|
||||
"id": "RREG-SELF-REG-002",
|
||||
"title": "Native API and CLI surfaces attached under false capability",
|
||||
"severity": "high",
|
||||
"description": (
|
||||
"API or CLI surface features are nested below provider routing."
|
||||
),
|
||||
"detection_hint": (
|
||||
"Flag API/CLI surface features whose parent capability is "
|
||||
"llm-integration or provider-routing."
|
||||
),
|
||||
}
|
||||
)
|
||||
if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions):
|
||||
patterns.append(
|
||||
{
|
||||
"id": "RREG-SELF-REG-003",
|
||||
"title": "Deterministic trusted auto-approval accepted candidate truth",
|
||||
"severity": "high",
|
||||
"description": (
|
||||
"Candidate characteristics were approved through trusted "
|
||||
"auto-approval instead of human or agentic judgement."
|
||||
),
|
||||
"detection_hint": "Flag trusted_auto_approve_candidate_graph review decisions.",
|
||||
}
|
||||
)
|
||||
return patterns
|
||||
|
||||
|
||||
def _execution_mode(decisions: list[ReviewDecision]) -> str:
|
||||
if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions):
|
||||
return "trusted-auto-review"
|
||||
if any(decision.action == "llm_extraction_used" for decision in decisions):
|
||||
return "llm-assisted"
|
||||
if any(decision.action.startswith("approve") for decision in decisions):
|
||||
return "manual-review"
|
||||
return "deterministic-only"
|
||||
|
||||
|
||||
def _candidate_source(decisions: list[ReviewDecision]) -> str:
|
||||
return "llm+deterministic" if any(
|
||||
decision.action == "llm_extraction_used" for decision in decisions
|
||||
) else "deterministic"
|
||||
|
||||
|
||||
def _acceptance_mode(decisions: list[ReviewDecision]) -> str:
|
||||
if any(decision.action == "trusted_auto_approve_candidate_graph" for decision in decisions):
|
||||
return "trusted_auto_approve_candidate_graph"
|
||||
if any(decision.action == "approve_candidate_graph" for decision in decisions):
|
||||
return "manual_candidate_graph_approval"
|
||||
if any(decision.action == "approve_analysis_run_changes" for decision in decisions):
|
||||
return "manual_change_approval"
|
||||
return "pending_review"
|
||||
|
||||
|
||||
def _timestamp(value: str | None) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if "T" in value:
|
||||
return value
|
||||
return value.replace(" ", "T") + "Z"
|
||||
|
||||
|
||||
def _artifact_id(repository_name: str, analysis_run_id: int, role: str) -> str:
|
||||
return f"{_slug(repository_name)}-{role}-run-{analysis_run_id}"
|
||||
|
||||
|
||||
def _slug(value: str) -> str:
|
||||
return "-".join(
|
||||
token for token in "".join(char.lower() if char.isalnum() else "-" for char in value).split("-") if token
|
||||
)
|
||||
|
||||
|
||||
def artifact_json(artifact: dict[str, Any]) -> str:
|
||||
return json.dumps(artifact, indent=2, sort_keys=True) + "\n"
|
||||
@@ -1,3 +1,5 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from repo_registry.cli import main
|
||||
@@ -98,3 +100,34 @@ def test_rebuild_cli_refuses_destructive_all_without_confirm_all(tmp_path):
|
||||
)
|
||||
|
||||
assert exc.value.code == 2
|
||||
|
||||
|
||||
def test_export_assessment_cli_writes_completed_run_artifact(tmp_path):
|
||||
service = make_service(tmp_path)
|
||||
source = write_repo(tmp_path)
|
||||
repository = service.register_repository(name="CLI Export", url=str(source))
|
||||
summary = service.analyze_repository(repository.id, use_llm_assistance=False)
|
||||
output_path = tmp_path / "assessment.json"
|
||||
|
||||
exit_code = main(
|
||||
[
|
||||
"export-assessment",
|
||||
"--repo",
|
||||
str(repository.id),
|
||||
"--analysis-run",
|
||||
str(summary.analysis_run.id),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--database-path",
|
||||
str(tmp_path / "registry.sqlite3"),
|
||||
"--checkout-root",
|
||||
str(tmp_path / "checkouts"),
|
||||
]
|
||||
)
|
||||
|
||||
artifact = json.loads(output_path.read_text(encoding="utf-8"))
|
||||
assert exit_code == 0
|
||||
assert artifact["target_repository"]["repo_slug"] == "cli-export"
|
||||
assert artifact["execution"]["analysis_run_id"] == summary.analysis_run.id
|
||||
assert artifact["assessment"]["role"] == "challenger"
|
||||
assert artifact["generated_tree"]["abilities"]
|
||||
|
||||
@@ -36,7 +36,11 @@ def test_self_scoping_assessment_schema_requires_release_binding_metadata():
|
||||
"execution",
|
||||
"assessment",
|
||||
"fact_summary",
|
||||
"content_chunk_summary",
|
||||
"generated_tree",
|
||||
"approved_map",
|
||||
"review_decisions",
|
||||
"quality_gate_outcomes",
|
||||
"known_regression_patterns",
|
||||
} <= required
|
||||
assert {
|
||||
@@ -79,6 +83,10 @@ def test_known_bad_self_scoping_artifact_captures_rejected_regression_seed():
|
||||
assert "Route LLM Requests Across Providers" in capability_names
|
||||
assert {"RREG-SELF-REG-001", "RREG-SELF-REG-002", "RREG-SELF-REG-003"} <= regression_ids
|
||||
assert artifact["fact_summary"]["counts_by_kind"]["llm_provider"] == 41
|
||||
assert "content_chunk_summary" in artifact
|
||||
assert "approved_map" in artifact
|
||||
assert artifact["review_decisions"][0]["action"] == "trusted_auto_approve_candidate_graph"
|
||||
assert artifact["quality_gate_outcomes"] == []
|
||||
|
||||
|
||||
def test_golden_profile_names_expected_native_capabilities_and_forbidden_false_positive():
|
||||
@@ -109,4 +117,3 @@ def test_golden_profile_names_expected_native_capabilities_and_forbidden_false_p
|
||||
assert profile["comparison_rules"]["must_not_have_native_capability_names"] == [
|
||||
"Route LLM Requests Across Providers"
|
||||
]
|
||||
|
||||
|
||||
97
tests/test_self_scoping_assessment_export.py
Normal file
97
tests/test_self_scoping_assessment_export.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from repo_registry.core.service import RegistryService
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.self_scoping.assessment import export_assessment_artifact
|
||||
from repo_registry.storage.sqlite import RegistryStore
|
||||
|
||||
|
||||
def make_service(tmp_path):
|
||||
store = RegistryStore(tmp_path / "registry.sqlite3")
|
||||
store.initialize()
|
||||
return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts"))
|
||||
|
||||
|
||||
def write_repo(tmp_path):
|
||||
source = tmp_path / "repo"
|
||||
source.mkdir()
|
||||
(source / "README.md").write_text(
|
||||
"# Exportable Repo\n\nReports service health.\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "app.py").write_text(
|
||||
'@app.get("/health")\n'
|
||||
"def health():\n"
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return source
|
||||
|
||||
|
||||
def test_export_assessment_artifact_binds_analysis_to_engine_identity(tmp_path):
|
||||
service = make_service(tmp_path)
|
||||
source = write_repo(tmp_path)
|
||||
repository = service.register_repository(
|
||||
name="Exportable Repo",
|
||||
url=str(source),
|
||||
)
|
||||
summary = service.analyze_repository(
|
||||
repository.id,
|
||||
use_llm_assistance=False,
|
||||
)
|
||||
|
||||
artifact = export_assessment_artifact(
|
||||
service,
|
||||
repository.id,
|
||||
summary.analysis_run.id,
|
||||
role="challenger",
|
||||
outcome="challenger",
|
||||
reviewer="test",
|
||||
)
|
||||
|
||||
assert artifact["schema_version"] == "self-scoping-assessment/v1"
|
||||
assert artifact["artifact_id"] == "exportable-repo-challenger-run-1"
|
||||
assert artifact["target_repository"]["repo_slug"] == "exportable-repo"
|
||||
assert artifact["target_repository"]["target_commit"]
|
||||
assert artifact["engine_identity"]["engine_commit"]
|
||||
assert artifact["engine_identity"]["release_binding_status"] == "complete"
|
||||
assert artifact["assessment"]["comparison_eligibility"] == "eligible"
|
||||
assert artifact["execution"]["mode"] == "deterministic-only"
|
||||
assert artifact["content_chunk_summary"]["total"] > 0
|
||||
assert artifact["generated_tree"]["abilities"]
|
||||
assert artifact["approved_map"]["abilities"] == []
|
||||
assert artifact["review_decisions"] == []
|
||||
assert artifact["quality_gate_outcomes"] == []
|
||||
|
||||
|
||||
def test_export_assessment_artifact_flags_known_provider_regression(tmp_path):
|
||||
service = make_service(tmp_path)
|
||||
source = tmp_path / "repo"
|
||||
source.mkdir()
|
||||
(source / "README.md").write_text("# Provider Vocabulary\n", encoding="utf-8")
|
||||
(source / "providers.py").write_text(
|
||||
"provider_registry = {'openrouter': OpenRouterAdapter}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
repository = service.register_repository(
|
||||
name="Provider Vocabulary",
|
||||
url=str(source),
|
||||
)
|
||||
summary = service.analyze_repository(
|
||||
repository.id,
|
||||
use_llm_assistance=False,
|
||||
)
|
||||
|
||||
artifact = export_assessment_artifact(
|
||||
service,
|
||||
repository.id,
|
||||
summary.analysis_run.id,
|
||||
role="challenger",
|
||||
outcome="challenger",
|
||||
reviewer="test",
|
||||
)
|
||||
|
||||
regression_ids = {item["id"] for item in artifact["known_regression_patterns"]}
|
||||
assert "RREG-SELF-REG-001" in regression_ids
|
||||
assert any(
|
||||
item["path"] == "providers.py"
|
||||
for item in artifact["fact_summary"]["contamination_sources"]
|
||||
) is False
|
||||
@@ -139,7 +139,7 @@ the original analysis run did not record the engine commit.
|
||||
|
||||
```task
|
||||
id: RREG-WP-0013-T04
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "51e01d45-7574-4c97-994d-dabb2bcf9a00"
|
||||
```
|
||||
@@ -156,6 +156,14 @@ Acceptance criteria:
|
||||
incomplete.
|
||||
- Export can target repo-scoping itself without requiring network access.
|
||||
|
||||
Implementation note 2026-05-15: added
|
||||
`src/repo_registry/self_scoping/assessment.py` and the
|
||||
`repo-scoping export-assessment` CLI command. The exporter reads an existing
|
||||
completed analysis run, records engine identity, generated candidate tree,
|
||||
approved map, fact/content summaries, review decisions, empty quality-gate
|
||||
outcomes pending RREG-WP-0014, and known regression patterns. Focused tests cover
|
||||
the exporter and CLI path.
|
||||
|
||||
## T05: Compare Baseline And Challenger Runs
|
||||
|
||||
```task
|
||||
|
||||
Reference in New Issue
Block a user