Add self-scoping baseline workplans and artifacts

This commit is contained in:
2026-05-15 12:26:36 +02:00
parent a6e1e2f16a
commit 90bae27237
7 changed files with 1592 additions and 0 deletions

View File

@@ -0,0 +1,442 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://repo-scoping.local/schemas/self-scoping-assessment.schema.json",
"title": "Self-Scoping Assessment Artifact",
"description": "Immutable artifact used to compare repo-scoping self-analysis results across engine releases.",
"type": "object",
"additionalProperties": false,
"required": [
"schema_version",
"artifact_id",
"artifact_type",
"created_at",
"target_repository",
"engine_identity",
"execution",
"assessment",
"fact_summary",
"generated_tree",
"known_regression_patterns"
],
"properties": {
"schema_version": {
"const": "self-scoping-assessment/v1"
},
"artifact_id": {
"type": "string",
"description": "Stable artifact identifier."
},
"artifact_type": {
"enum": ["assessment_run"]
},
"created_at": {
"type": "string",
"format": "date-time"
},
"target_repository": {
"$ref": "#/$defs/targetRepository"
},
"engine_identity": {
"$ref": "#/$defs/engineIdentity"
},
"execution": {
"$ref": "#/$defs/execution"
},
"assessment": {
"$ref": "#/$defs/assessment"
},
"fact_summary": {
"$ref": "#/$defs/factSummary"
},
"generated_tree": {
"$ref": "#/$defs/generatedTree"
},
"known_regression_patterns": {
"type": "array",
"items": {
"$ref": "#/$defs/regressionPattern"
}
},
"notes": {
"type": "array",
"items": {
"type": "string"
}
}
},
"$defs": {
"targetRepository": {
"type": "object",
"additionalProperties": false,
"required": [
"repo_slug",
"repository_id",
"source",
"target_commit",
"target_branch",
"dirty_state",
"file_count"
],
"properties": {
"repo_slug": {
"type": "string"
},
"repository_id": {
"type": ["integer", "null"]
},
"source": {
"type": "string"
},
"target_commit": {
"type": "string"
},
"target_branch": {
"type": "string"
},
"dirty_state": {
"enum": ["clean", "dirty", "unknown"]
},
"file_count": {
"type": ["integer", "null"],
"minimum": 0
}
}
},
"engineIdentity": {
"type": "object",
"additionalProperties": false,
"required": [
"repo_scoping_version",
"engine_commit",
"engine_release",
"engine_dirty_state",
"scanner_version",
"candidate_generator_version",
"quality_criteria_version",
"prompt_version",
"release_binding_status"
],
"properties": {
"repo_scoping_version": {
"type": "string"
},
"engine_commit": {
"type": ["string", "null"]
},
"engine_release": {
"type": ["string", "null"]
},
"engine_dirty_state": {
"enum": ["clean", "dirty", "unknown"]
},
"scanner_version": {
"type": "string"
},
"candidate_generator_version": {
"type": "string"
},
"quality_criteria_version": {
"type": "string"
},
"prompt_version": {
"type": ["string", "null"]
},
"release_binding_status": {
"enum": ["complete", "historical_incomplete", "unbound"]
},
"release_binding_note": {
"type": "string"
}
}
},
"execution": {
"type": "object",
"additionalProperties": false,
"required": [
"mode",
"analysis_run_id",
"candidate_source",
"acceptance_mode"
],
"properties": {
"mode": {
"enum": [
"deterministic-only",
"llm-assisted",
"agent-reviewed",
"manual-review",
"trusted-auto-review",
"mixed"
]
},
"analysis_run_id": {
"type": ["integer", "null"]
},
"candidate_source": {
"type": "string"
},
"acceptance_mode": {
"type": "string"
},
"started_at": {
"type": ["string", "null"],
"format": "date-time"
},
"completed_at": {
"type": ["string", "null"],
"format": "date-time"
}
}
},
"assessment": {
"type": "object",
"additionalProperties": false,
"required": [
"role",
"outcome",
"summary",
"reviewer",
"comparison_eligibility"
],
"properties": {
"role": {
"enum": ["baseline", "challenger", "negative_regression_seed"]
},
"outcome": {
"enum": ["baseline", "challenger", "preferred", "tied", "rejected", "superseded", "needs-human"]
},
"summary": {
"type": "string"
},
"reviewer": {
"type": "string"
},
"comparison_eligibility": {
"enum": ["eligible", "eligible_as_negative_seed", "not_comparable"]
},
"rationale": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"factSummary": {
"type": "object",
"additionalProperties": false,
"required": ["counts_by_kind"],
"properties": {
"counts_by_kind": {
"type": "object",
"additionalProperties": {
"type": "integer",
"minimum": 0
}
},
"contamination_sources": {
"type": "array",
"items": {
"$ref": "#/$defs/contaminationSource"
}
}
}
},
"contaminationSource": {
"type": "object",
"additionalProperties": false,
"required": ["path", "reason"],
"properties": {
"path": {
"type": "string"
},
"reason": {
"type": "string"
}
}
},
"generatedTree": {
"type": "object",
"additionalProperties": false,
"required": ["abilities"],
"properties": {
"abilities": {
"type": "array",
"items": {
"$ref": "#/$defs/ability"
}
}
}
},
"ability": {
"type": "object",
"additionalProperties": false,
"required": ["name", "status", "primary_class", "capabilities"],
"properties": {
"name": {
"type": "string"
},
"status": {
"type": "string"
},
"primary_class": {
"type": "string"
},
"capabilities": {
"type": "array",
"items": {
"$ref": "#/$defs/capability"
}
}
}
},
"capability": {
"type": "object",
"additionalProperties": false,
"required": ["name", "status", "primary_class", "features"],
"properties": {
"name": {
"type": "string"
},
"status": {
"type": "string"
},
"primary_class": {
"type": "string"
},
"features": {
"type": "array",
"items": {
"$ref": "#/$defs/feature"
}
}
}
},
"feature": {
"type": "object",
"additionalProperties": false,
"required": ["name", "type", "status", "primary_class", "location"],
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string"
},
"status": {
"type": "string"
},
"primary_class": {
"type": "string"
},
"location": {
"type": "string"
}
}
},
"regressionPattern": {
"type": "object",
"additionalProperties": false,
"required": ["id", "title", "severity", "description", "detection_hint"],
"properties": {
"id": {
"type": "string"
},
"title": {
"type": "string"
},
"severity": {
"enum": ["low", "medium", "high", "critical"]
},
"description": {
"type": "string"
},
"detection_hint": {
"type": "string"
}
}
}
},
"examples": [
{
"schema_version": "self-scoping-assessment/v1",
"artifact_id": "repo-scoping-known-bad-2026-05-15-run-39",
"artifact_type": "assessment_run",
"created_at": "2026-05-15T09:28:48Z",
"target_repository": {
"repo_slug": "repo-scoping",
"repository_id": 16,
"source": "/home/worsch/repo-scoping/var/checkouts/repo-scoping-8a9c4168485c",
"target_commit": "00b57d509124789059639fedc724d9314edbb7b2",
"target_branch": "main",
"dirty_state": "unknown",
"file_count": 96
},
"engine_identity": {
"repo_scoping_version": "0.1.0",
"engine_commit": null,
"engine_release": null,
"engine_dirty_state": "unknown",
"scanner_version": "deterministic-v0.1",
"candidate_generator_version": "unversioned",
"quality_criteria_version": "none",
"prompt_version": null,
"release_binding_status": "historical_incomplete",
"release_binding_note": "Historical database run did not record engine commit."
},
"execution": {
"mode": "trusted-auto-review",
"analysis_run_id": 39,
"candidate_source": "deterministic",
"acceptance_mode": "trusted_auto_approve_candidate_graph",
"started_at": "2026-05-15T09:28:47Z",
"completed_at": "2026-05-15T09:28:48Z"
},
"assessment": {
"role": "negative_regression_seed",
"outcome": "rejected",
"summary": "Provider vocabulary was promoted into a false native LLM routing capability.",
"reviewer": "codex",
"comparison_eligibility": "eligible_as_negative_seed",
"rationale": ["The generated tree misclassified scanner vocabulary as product behavior."]
},
"fact_summary": {
"counts_by_kind": {
"llm_provider": 41
},
"contamination_sources": [
{
"path": "src/repo_registry/repo_scanning/scanner.py",
"reason": "Scanner rule vocabulary was treated as repo-owned capability evidence."
}
]
},
"generated_tree": {
"abilities": [
{
"name": "Support Repo Registry",
"status": "approved",
"primary_class": "repository-intelligence",
"capabilities": [
{
"name": "Route LLM Requests Across Providers",
"status": "approved",
"primary_class": "llm-integration",
"features": []
}
]
}
]
},
"known_regression_patterns": [
{
"id": "RREG-SELF-REG-001",
"title": "LLM provider vocabulary promoted as native capability",
"severity": "critical",
"description": "Scanner or fixture vocabulary becomes a repo-scoping product capability.",
"detection_hint": "Flag Route LLM Requests Across Providers when parented as a native repo-scoping capability."
}
]
}
]
}

View File

@@ -0,0 +1,35 @@
# Self-Scoping Assessment Artifacts
This directory contains repo-scoping's own baseline and assessment artifacts.
These files are meant to make scoping-engine changes comparable across releases
instead of relying on memory or screenshots.
## Artifact Types
- `golden/repo-scoping-golden-profile.v1.json` is the curated target profile for
repo-scoping itself.
- `assessments/repo-scoping-known-bad-2026-05-15-run-39.json` captures the
known-bad self-analysis that promoted LLM-provider vocabulary into native
repo-scoping capability truth.
- `../schemas/self-scoping-assessment.schema.json` defines the immutable
assessment-run artifact shape.
## Release Binding
Comparable assessment artifacts must bind generated results to the repo-scoping
engine release that produced them. A complete binding records package version,
engine git commit or release tag, dirty state, scanner version, candidate
generator version, quality criteria version, and prompt version when applicable.
The current known-bad artifact is marked `historical_incomplete` because the
original database run did not record the engine commit. It remains useful as a
negative regression seed, but future challenger artifacts should be fully bound
before they are accepted as comparable baselines.
## Review Use
When the engine changes, run repo-scoping against itself and export a challenger
assessment. Compare the challenger to the golden profile and to the negative
seed. Reviewers should be able to choose whether the old result, new result, or
neither is better, then store that judgement as a new assessment outcome.

View File

@@ -0,0 +1,209 @@
{
"schema_version": "self-scoping-assessment/v1",
"artifact_id": "repo-scoping-known-bad-2026-05-15-run-39",
"artifact_type": "assessment_run",
"created_at": "2026-05-15T09:28:48Z",
"target_repository": {
"repo_slug": "repo-scoping",
"repository_id": 16,
"source": "/home/worsch/repo-scoping/var/checkouts/repo-scoping-8a9c4168485c",
"target_commit": "00b57d509124789059639fedc724d9314edbb7b2",
"target_branch": "main",
"dirty_state": "unknown",
"file_count": 96
},
"engine_identity": {
"repo_scoping_version": "0.1.0",
"engine_commit": null,
"engine_release": null,
"engine_dirty_state": "unknown",
"scanner_version": "deterministic-v0.1",
"candidate_generator_version": "unversioned-pre-self-scoping-baseline",
"quality_criteria_version": "none",
"prompt_version": null,
"release_binding_status": "historical_incomplete",
"release_binding_note": "This historical database run recorded scanner version and target commit, but not the repo-scoping engine commit or release tag that generated the candidate graph."
},
"execution": {
"mode": "trusted-auto-review",
"analysis_run_id": 39,
"candidate_source": "deterministic",
"acceptance_mode": "trusted_auto_approve_candidate_graph",
"started_at": "2026-05-15T09:28:47Z",
"completed_at": "2026-05-15T09:28:48Z"
},
"assessment": {
"role": "negative_regression_seed",
"outcome": "rejected",
"summary": "The self-analysis promoted LLM-provider vocabulary into a false native repo-scoping capability and attached API/CLI features below it.",
"reviewer": "codex",
"comparison_eligibility": "eligible_as_negative_seed",
"rationale": [
"repo-scoping uses llm-connect as optional extraction infrastructure; it does not natively route LLM requests across providers.",
"Provider names came from scanner rules, normalization tokens, schema examples, tests, fixtures, and workplan text rather than product-facing provider-routing behavior.",
"The generated tree placed native API and CLI surfaces under the false LLM-provider capability, which makes the feature hierarchy misleading."
]
},
"fact_summary": {
"counts_by_kind": {
"config": 1,
"credential_config": 13,
"documentation": 14,
"fallback_policy": 10,
"framework": 2,
"intent": 1,
"interface": 127,
"language": 1,
"llm_provider": 41,
"manifest": 1,
"provider_registry": 7,
"scope": 1,
"test": 19
},
"contamination_sources": [
{
"path": "src/repo_registry/repo_scanning/scanner.py",
"reason": "Provider detector constants, credential hint constants, and fallback/provider-registry scanner logic were treated as repo-owned LLM routing evidence."
},
{
"path": "src/repo_registry/candidate_graph/normalization.py",
"reason": "Provider names used as distinctive candidate-normalization tokens were treated as implementation evidence for provider support."
},
{
"path": "src/repo_registry/web_api/schemas.py",
"reason": "An OpenRouter example in an expectation-gap schema was treated as provider evidence."
},
{
"path": "tests/expectations/llm_connect_provider_expectations.json",
"reason": "A fixture describing llm-connect expectations was treated as repo-scoping product behavior."
},
{
"path": "tests/fixtures.py",
"reason": "Regression fixture vocabulary was treated as native repo-scoping capability evidence."
},
{
"path": "tests/test_candidate_graph.py",
"reason": "Unit-test examples for LLM-provider detection were treated as product evidence."
},
{
"path": "tests/test_repository_scanner.py",
"reason": "Scanner tests for provider facts were treated as native provider-routing evidence."
}
]
},
"generated_tree": {
"abilities": [
{
"name": "Support Repo Registry",
"status": "approved",
"primary_class": "repository-intelligence",
"capabilities": [
{
"name": "Route LLM Requests Across Providers",
"status": "approved",
"primary_class": "llm-integration",
"features": [
{
"name": "Use Anthropic Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
},
{
"name": "Use Claude Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
},
{
"name": "Use Gemini Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
},
{
"name": "Use OpenAI Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
},
{
"name": "Use OpenRouter Models",
"type": "integration",
"status": "approved",
"primary_class": "integration",
"location": "multiple files"
},
{
"name": "Configure LLM Provider Credentials",
"type": "configuration",
"status": "approved",
"primary_class": "configuration",
"location": "multiple files"
},
{
"name": "Maintain LLM Provider Registry",
"type": "backend",
"status": "approved",
"primary_class": "backend",
"location": "src/repo_registry/repo_scanning/scanner.py"
},
{
"name": "Apply LLM Provider Fallback Policy",
"type": "backend",
"status": "approved",
"primary_class": "backend",
"location": "src/repo_registry/repo_scanning/scanner.py"
},
{
"name": "HTTP API surface: possible API surface, GET /health, @app.post(, +43 more",
"type": "API",
"status": "approved",
"primary_class": "API",
"location": "multiple files"
},
{
"name": "CLI command surface: CLI command build_parser, CLI command make_service",
"type": "CLI",
"status": "approved",
"primary_class": "CLI",
"location": "multiple files"
}
]
}
]
}
]
},
"known_regression_patterns": [
{
"id": "RREG-SELF-REG-001",
"title": "LLM provider vocabulary promoted as native capability",
"severity": "critical",
"description": "Scanner, normalization, schema, fixture, test, or workplan vocabulary becomes the native repo-scoping capability Route LLM Requests Across Providers.",
"detection_hint": "Flag any top-level/native repo-scoping capability named Route LLM Requests Across Providers unless product intent and public implementation explicitly show provider routing as a repo-scoping feature."
},
{
"id": "RREG-SELF-REG-002",
"title": "Native API and CLI surfaces attached under false capability",
"severity": "high",
"description": "General repo-scoping API/CLI interface features are nested below a capability they do not support.",
"detection_hint": "Flag API or CLI surface features when their parent capability is llm-integration or provider-routing."
},
{
"id": "RREG-SELF-REG-003",
"title": "Deterministic trusted auto-approval accepted candidate truth",
"severity": "high",
"description": "A deterministic rule path approves candidate characteristics without human or agentic judgement.",
"detection_hint": "Flag trusted_auto_approve_candidate_graph review decisions in self-scoping assessment artifacts."
}
],
"notes": [
"This artifact is a negative regression seed, not a desirable baseline.",
"The historical run is useful for pattern detection but is not fully release-bound because the engine commit was not recorded in the original analysis metadata."
]
}

View File

@@ -0,0 +1,311 @@
{
"schema_version": "self-scoping-golden-profile/v1",
"profile_id": "repo-scoping-golden-profile-v1",
"repo_slug": "repo-scoping",
"status": "active",
"created_at": "2026-05-15",
"updated_at": "2026-05-15",
"curation": {
"curator": "codex",
"workplan_id": "RREG-WP-0013",
"summary": "Curated target profile for evaluating repo-scoping self-analysis quality."
},
"ability": {
"name": "Map Repositories Into Reviewable Scope Profiles",
"primary_class": "repository-intelligence",
"attributes": [
"capability-mapping",
"source-linked-review",
"scope-generation"
],
"description": "repo-scoping turns repository source, documentation, and review decisions into source-linked maps of repository utility.",
"expected_capabilities": [
{
"name": "Register And Track Repositories",
"primary_class": "ingestion",
"attributes": ["metadata", "git", "analysis-run"],
"expected_features": [
{
"name": "Create and update repository records",
"primary_class": "api",
"source_paths": [
"src/repo_registry/core/service.py",
"src/repo_registry/web_api/app.py",
"src/repo_registry/web_ui/views.py"
]
},
{
"name": "Resolve local or remote Git checkouts",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/repo_ingestion/git.py",
"tests/test_git_ingestion.py"
]
},
{
"name": "Import repository metadata",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/repo_ingestion/metadata.py",
"tests/test_repository_metadata.py"
]
}
]
},
{
"name": "Scan Repositories Into Observed Facts",
"primary_class": "analysis",
"attributes": ["deterministic", "facts", "provenance"],
"expected_features": [
{
"name": "Detect source languages, manifests, docs, tests, config, and interfaces",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/repo_scanning/scanner.py",
"tests/test_repository_scanner.py"
]
},
{
"name": "Classify source roles for facts",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/repo_scanning/scanner.py",
"docs/characteristic-evidence-model.md"
]
},
{
"name": "Preserve analysis snapshots and fact records",
"primary_class": "storage",
"source_paths": [
"src/repo_registry/storage/sqlite.py",
"migrations/0001_initial.sql"
]
}
]
},
{
"name": "Index Source Content With Provenance",
"primary_class": "analysis",
"attributes": ["content-chunks", "source-role"],
"expected_features": [
{
"name": "Create source-linked content chunks from observed facts",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/content_indexing/extractor.py",
"tests/test_content_indexing.py"
]
},
{
"name": "Carry source-role metadata into downstream generation",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/content_indexing/extractor.py",
"src/repo_registry/llm_extraction/extractor.py"
]
}
]
},
{
"name": "Generate Reviewable Candidate Characteristics",
"primary_class": "analysis",
"attributes": ["candidate-graph", "review-required"],
"expected_features": [
{
"name": "Build candidate abilities, capabilities, features, and evidence",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/candidate_graph/generator.py",
"src/repo_registry/candidate_graph/normalization.py",
"tests/test_candidate_graph.py"
]
},
{
"name": "Optionally map structured LLM extraction into candidates",
"primary_class": "integration",
"source_paths": [
"src/repo_registry/llm_extraction/extractor.py",
"src/repo_registry/llm_extraction/mapper.py",
"tests/test_llm_extraction.py"
]
}
]
},
{
"name": "Review And Approve Candidate Characteristics",
"primary_class": "review",
"attributes": ["curation", "approval", "audit"],
"expected_features": [
{
"name": "Edit, reject, merge, and relink candidate graph entries",
"primary_class": "api",
"source_paths": [
"src/repo_registry/core/service.py",
"src/repo_registry/web_api/app.py",
"src/repo_registry/web_ui/views.py",
"tests/test_registry_service.py"
]
},
{
"name": "Publish approved characteristic maps after review",
"primary_class": "storage",
"source_paths": [
"src/repo_registry/core/service.py",
"src/repo_registry/storage/sqlite.py"
]
},
{
"name": "Record review decisions and expectation gaps",
"primary_class": "audit",
"source_paths": [
"src/repo_registry/core/service.py",
"src/repo_registry/web_api/schemas.py"
]
}
]
},
{
"name": "Search Compare And Export Approved Profiles",
"primary_class": "discovery",
"attributes": ["search", "comparison", "export"],
"expected_features": [
{
"name": "Search approved abilities, capabilities, features, and evidence",
"primary_class": "api",
"source_paths": [
"src/repo_registry/core/service.py",
"src/repo_registry/semantic/embeddings.py",
"tests/test_registry_service.py"
]
},
{
"name": "Compare repositories and identify capability gaps",
"primary_class": "api",
"source_paths": [
"src/repo_registry/core/service.py",
"src/repo_registry/web_api/app.py"
]
},
{
"name": "Export repository profiles",
"primary_class": "api",
"source_paths": [
"src/repo_registry/web_api/app.py",
"docs/api-contract.md"
]
}
]
},
{
"name": "Generate And Maintain SCOPE.md",
"primary_class": "scope-generation",
"attributes": ["scope-md", "diff", "validation"],
"expected_features": [
{
"name": "Render SCOPE.md from approved characteristics",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/scope/generator.py",
"tests/test_scope_generator.py",
"docs/scope-md-spec.md"
]
},
{
"name": "Diff, validate, and write scope files",
"primary_class": "api",
"source_paths": [
"src/repo_registry/scope/validator.py",
"src/repo_registry/web_api/app.py",
"src/repo_registry/web_ui/views.py"
]
}
]
},
{
"name": "Explore Dependency And Impact Graphs",
"primary_class": "dependency-analysis",
"attributes": ["graph", "impact", "visualization"],
"expected_features": [
{
"name": "Model dependencies between facts, evidence, features, capabilities, abilities, and scope",
"primary_class": "backend",
"source_paths": [
"src/repo_registry/core/service.py",
"docs/dependency-aware-scope-propagation.md",
"docs/dependency-visualization-exploration.md"
]
},
{
"name": "Render dependency graph views and profiles",
"primary_class": "ui",
"source_paths": [
"src/repo_registry/web_ui/views.py",
"tests/test_web_api.py"
]
}
]
},
{
"name": "Provide Scope Context To Downstream Agents",
"primary_class": "coordination",
"attributes": ["activity-core", "api-contract"],
"expected_features": [
{
"name": "Return compact JSON scope context by repository slug",
"primary_class": "api",
"source_paths": [
"src/repo_registry/web_api/app.py",
"docs/schemas/repo-scope-context-response.json",
"tests/test_scope_context_api.py"
]
}
]
}
]
},
"forbidden_native_capabilities": [
{
"name": "Route LLM Requests Across Providers",
"reason": "repo-scoping may use llm-connect as optional extraction infrastructure, but provider routing is not a native repo-scoping product capability.",
"allowed_only_if": "Future product intent and public implementation explicitly add provider routing as repo-scoping-owned behavior."
}
],
"non_native_context": [
{
"name": "LLM provider names in scanner, normalization, schemas, tests, fixtures, docs, or workplans",
"classification": "scanner-rule-or-fixture-context",
"expected_handling": "May support scanner behavior facts or test coverage, but must not become native capability truth."
},
{
"name": "llm-connect integration",
"classification": "optional dependency / adapter consumer",
"expected_handling": "May appear as optional extraction infrastructure, not as repo-scoping-owned provider routing."
},
{
"name": "SCOPE.md content",
"classification": "derived scope",
"expected_handling": "Can be comparison or bootstrap context, not primary evidence for regenerating the same characteristic model."
}
],
"comparison_rules": {
"must_have_capability_names": [
"Register And Track Repositories",
"Scan Repositories Into Observed Facts",
"Index Source Content With Provenance",
"Generate Reviewable Candidate Characteristics",
"Review And Approve Candidate Characteristics",
"Search Compare And Export Approved Profiles",
"Generate And Maintain SCOPE.md",
"Explore Dependency And Impact Graphs",
"Provide Scope Context To Downstream Agents"
],
"must_not_have_native_capability_names": [
"Route LLM Requests Across Providers"
],
"known_regression_ids": [
"RREG-SELF-REG-001",
"RREG-SELF-REG-002",
"RREG-SELF-REG-003"
]
}
}

View File

@@ -0,0 +1,112 @@
import json
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SCHEMA_PATH = ROOT / "docs" / "schemas" / "self-scoping-assessment.schema.json"
KNOWN_BAD_PATH = (
ROOT
/ "docs"
/ "self-scoping"
/ "assessments"
/ "repo-scoping-known-bad-2026-05-15-run-39.json"
)
GOLDEN_PROFILE_PATH = (
ROOT
/ "docs"
/ "self-scoping"
/ "golden"
/ "repo-scoping-golden-profile.v1.json"
)
def load_json(path: Path) -> dict:
return json.loads(path.read_text(encoding="utf-8"))
def test_self_scoping_assessment_schema_requires_release_binding_metadata():
schema = load_json(SCHEMA_PATH)
required = set(schema["required"])
engine_required = set(schema["$defs"]["engineIdentity"]["required"])
assert {
"target_repository",
"engine_identity",
"execution",
"assessment",
"fact_summary",
"generated_tree",
"known_regression_patterns",
} <= required
assert {
"repo_scoping_version",
"engine_commit",
"engine_release",
"engine_dirty_state",
"scanner_version",
"candidate_generator_version",
"quality_criteria_version",
"prompt_version",
"release_binding_status",
} <= engine_required
assert schema["$defs"]["engineIdentity"]["properties"]["release_binding_status"][
"enum"
] == ["complete", "historical_incomplete", "unbound"]
def test_known_bad_self_scoping_artifact_captures_rejected_regression_seed():
artifact = load_json(KNOWN_BAD_PATH)
assert artifact["schema_version"] == "self-scoping-assessment/v1"
assert artifact["artifact_id"] == "repo-scoping-known-bad-2026-05-15-run-39"
assert artifact["target_repository"]["repo_slug"] == "repo-scoping"
assert artifact["execution"]["analysis_run_id"] == 39
assert artifact["assessment"]["role"] == "negative_regression_seed"
assert artifact["assessment"]["outcome"] == "rejected"
assert (
artifact["engine_identity"]["release_binding_status"]
== "historical_incomplete"
)
capability_names = {
capability["name"]
for ability in artifact["generated_tree"]["abilities"]
for capability in ability["capabilities"]
}
regression_ids = {item["id"] for item in artifact["known_regression_patterns"]}
assert "Route LLM Requests Across Providers" in capability_names
assert {"RREG-SELF-REG-001", "RREG-SELF-REG-002", "RREG-SELF-REG-003"} <= regression_ids
assert artifact["fact_summary"]["counts_by_kind"]["llm_provider"] == 41
def test_golden_profile_names_expected_native_capabilities_and_forbidden_false_positive():
profile = load_json(GOLDEN_PROFILE_PATH)
expected_capability_names = {
capability["name"]
for capability in profile["ability"]["expected_capabilities"]
}
forbidden_names = {
capability["name"] for capability in profile["forbidden_native_capabilities"]
}
assert profile["schema_version"] == "self-scoping-golden-profile/v1"
assert profile["repo_slug"] == "repo-scoping"
assert {
"Register And Track Repositories",
"Scan Repositories Into Observed Facts",
"Index Source Content With Provenance",
"Generate Reviewable Candidate Characteristics",
"Review And Approve Candidate Characteristics",
"Search Compare And Export Approved Profiles",
"Generate And Maintain SCOPE.md",
"Explore Dependency And Impact Graphs",
"Provide Scope Context To Downstream Agents",
} <= expected_capability_names
assert "Route LLM Requests Across Providers" in forbidden_names
assert profile["comparison_rules"]["must_not_have_native_capability_names"] == [
"Route LLM Requests Across Providers"
]

View File

@@ -0,0 +1,258 @@
---
id: RREG-WP-0013
type: workplan
title: "Self-Scoping Baseline Evaluation"
domain: capabilities
repo: repo-scoping
status: active
owner: codex
topic_slug: foerster-capabilities
created: "2026-05-15"
updated: "2026-05-15"
state_hub_workstream_id: "1c740db0-1999-478b-b3e3-c0fdfec1e9dd"
---
# Self-Scoping Baseline Evaluation
repo-scoping should become a self-improving infrastructure: every meaningful
change to the scoping engine should be testable against a known baseline for
repo-scoping itself. The goal is not just to assert that output changed, but to
make it easy for a human or trusted agent to decide whether an old or new
result is better and preserve that assessment as signal for future engine
iterations.
The motivating failure is the 2026-05-15 self-analysis where deterministic
provider-vocabulary facts were promoted into an approved `Route LLM Requests
Across Providers` capability and the repo's native API/CLI features were
attached under that incorrect capability. Future reruns should make regressions
like that obvious, reviewable, and attributable to the exact repo-scoping
release that generated them.
## T01: Define Self-Scoping Assessment Model
```task
id: RREG-WP-0013-T01
status: done
priority: high
state_hub_task_id: "af633b76-3356-4480-8108-d996eeda5a31"
```
Define the data model for immutable self-scoping assessment runs.
Each assessment must bind together:
- The target repository identity: repo slug, source URL/path, target commit,
target branch, and dirty-state marker when applicable.
- The engine identity: repo-scoping package version, git commit, git tag or
release name when available, dirty-state marker, scanner version, candidate
generator version, quality-gate/ruleset version, schema version, and prompt
version/hash when LLM or agentic review is used.
- The execution mode: deterministic-only, LLM-assisted, agent-reviewed,
trusted-auto-review, manual-review, or mixed.
- The generated artifacts: observed fact summary, candidate graph, approved map
or proposed approval set, rejected/downgraded items, source refs, and review
notes.
- The assessment outcome: baseline, challenger, preferred, tied, rejected,
superseded, or needs-human.
Acceptance criteria:
- A documented schema exists for self-scoping assessment runs.
- Assessment runs are append-only; reruns create new records instead of
rewriting old judgements.
- Engine release binding is required before an assessment can be compared.
- Dirty working trees are visible in the assessment metadata.
## T02: Capture Current Bad Self-Run As A Regression Seed
```task
id: RREG-WP-0013-T02
status: done
priority: high
state_hub_task_id: "98258aea-65bb-4709-921f-711c6cc6ee48"
```
Import or recreate the known-bad repo-scoping self-analysis as a named
regression seed.
Known bad pattern:
- Candidate/approved capability: `Route LLM Requests Across Providers`.
- Incorrect feature attachment: repo-scoping API/CLI surfaces nested under that
LLM-provider capability.
- Incorrect evidence: scanner vocabulary, schema examples, tests, and
provider-name normalization code treated as repo-owned LLM routing behavior.
Acceptance criteria:
- The bad run can be inspected as a historical assessment artifact.
- It is clearly marked as a negative baseline, not a desired golden output.
- The failure explanation is stored next to the captured graph.
- Future comparison reports can flag when a challenger repeats the same pattern.
## T03: Create Desired Repo-Scoping Golden Profile
```task
id: RREG-WP-0013-T03
status: done
priority: high
state_hub_task_id: "f3ef1711-a115-4368-a97e-98abd1eda521"
```
Author a curated golden profile for repo-scoping itself. This should be compact
enough for comparison but expressive enough to catch hierarchy errors.
Expected native capabilities should cover at least:
- Repository registration and metadata import.
- Deterministic repository scanning into observed facts.
- Source-role and provenance-aware content indexing.
- Candidate characteristic generation from facts and content.
- Candidate review, edit, reject, merge, relink, and approval workflow.
- Approved characteristic search, comparison, export, and capability-gap
exploration.
- SCOPE.md generation, diffing, validation, and write/update flows.
- Dependency graph and characteristic impact exploration.
- Scope context API support for downstream agents such as activity-core.
Forbidden top-level/native capabilities should include:
- `Route LLM Requests Across Providers`, unless repo-scoping later genuinely
implements provider routing as a product feature rather than using
`llm-connect` as optional extraction infrastructure.
Acceptance criteria:
- The golden profile includes ability, capability, feature, and evidence
expectations with source paths.
- The profile distinguishes native utility from dependencies, fixtures, test
vocabulary, schema examples, and optional LLM extraction infrastructure.
- The profile is stored in a stable, reviewable fixture location.
- The profile can evolve through explicit assessment decisions.
Implementation note 2026-05-15: added
`docs/schemas/self-scoping-assessment.schema.json`,
`docs/self-scoping/assessments/repo-scoping-known-bad-2026-05-15-run-39.json`,
`docs/self-scoping/golden/repo-scoping-golden-profile.v1.json`, and
`tests/test_self_scoping_artifacts.py`. The known-bad artifact is marked as a
negative regression seed with `historical_incomplete` release binding because
the original analysis run did not record the engine commit.
## T04: Export Assessment Artifacts From Analysis Runs
```task
id: RREG-WP-0013-T04
status: todo
priority: high
state_hub_task_id: "51e01d45-7574-4c97-994d-dabb2bcf9a00"
```
Add a CLI and/or API workflow that exports a completed analysis run as a
self-scoping assessment artifact.
Acceptance criteria:
- Export includes repository metadata, analysis run metadata, engine identity,
candidate graph, observed fact summary, content chunk summary, approved map
if present, review decisions, and quality-gate outcomes when available.
- Export format is deterministic JSON with a documented schema.
- Export refuses to mark an artifact comparable when engine identity is
incomplete.
- Export can target repo-scoping itself without requiring network access.
## T05: Compare Baseline And Challenger Runs
```task
id: RREG-WP-0013-T05
status: todo
priority: high
state_hub_task_id: "2b71069b-6150-45f4-84a2-59f5ec1e04c0"
```
Implement comparison between an existing baseline and a later challenger run.
Comparison should report:
- Added, removed, renamed, and moved abilities/capabilities/features.
- Hierarchy quality changes, especially misplaced features under the wrong
capability.
- Native-utility precision: whether generated capabilities are repo-owned,
facade/adapter, dependency, tooling, fixture, schema-example, or mention-only.
- Coverage against the repo-scoping golden profile.
- Regression flags for known-bad patterns.
- Source-ref quality: whether claims cite product intent, docs, source, tests,
fixtures, examples, or generated/derived scope.
Acceptance criteria:
- Comparison output is useful in both machine-readable JSON and human-readable
Markdown.
- The report makes it easy to choose "old better", "new better", "tie", or
"needs review".
- It does not require candidates to have stable database IDs across runs.
- It can compare deterministic-only and agent-reviewed runs without losing
provenance.
## T06: Add Side-By-Side Review UI
```task
id: RREG-WP-0013-T06
status: todo
priority: medium
state_hub_task_id: "16a60b7c-7e2c-4bb0-b4ab-2381289dba0b"
```
Expose baseline/challenger comparison in the curator UI.
Acceptance criteria:
- Reviewers can select two assessment artifacts for repo-scoping.
- The UI shows the two hierarchy trees side by side with moved/misplaced items
highlighted.
- Reviewers can record preference, tie, rejection, and notes.
- Review decisions are persisted as assessment outcomes, not as changes to the
underlying historical artifacts.
## T07: Add Self-Scoping Regression Command
```task
id: RREG-WP-0013-T07
status: todo
priority: medium
state_hub_task_id: "af1fcecd-686d-4592-b739-4698abc98c55"
```
Add a repeatable command for running repo-scoping against itself and comparing
the result to the active baseline.
Acceptance criteria:
- The command captures engine identity before running analysis.
- The command can run deterministic-only without LLM or agentic review.
- The command can optionally invoke agentic review when configured.
- The command emits a comparison report and exits non-zero only for explicit
CI-blocking regressions, not for ordinary "needs review" assessment outcomes.
## T08: Document Assessment Workflow
```task
id: RREG-WP-0013-T08
status: todo
priority: medium
state_hub_task_id: "30d71946-3598-4dc7-9970-c7c18126cad7"
```
Document how maintainers should use self-scoping assessment artifacts while
evolving the engine.
Acceptance criteria:
- Documentation explains baseline, challenger, preferred, tied, rejected, and
superseded outcomes.
- Documentation explains engine release binding and why unbound output is not
comparable.
- Documentation gives examples for the known-bad LLM-provider regression and a
desired native repo-scoping profile.
- Documentation describes when to update the golden profile versus when to fix
the engine.
## Completion Criteria
- repo-scoping has an immutable, release-bound self-scoping assessment format.
- The current known-bad output is captured as a negative regression seed.
- A curated desired repo-scoping profile exists.
- Maintainers can rerun repo-scoping on itself, compare old/new results, and
record which output is better.
- Comparison results are bound to the repo-scoping release that generated them.

View File

@@ -0,0 +1,225 @@
---
id: RREG-WP-0014
type: workplan
title: "Agentic Characteristic Acceptance"
domain: capabilities
repo: repo-scoping
status: active
owner: codex
topic_slug: foerster-capabilities
created: "2026-05-15"
updated: "2026-05-15"
state_hub_workstream_id: "7feaa5b5-32d8-4b8e-b377-cbb3ddacf64a"
---
# Agentic Characteristic Acceptance
Deterministic rules should not automatically accept candidate
characteristics. Determinism is strongest at fast, source-linked observation and
at applying transparent rejection or downgrade criteria: facts, provenance,
formal quality checks, schema validation, duplicate detection, and clear
negative filters.
Acceptance is a judgement step. When automation stands in for human judgement,
it should be agentic: inspect the evidence, apply the visible quality criteria,
explain the decision, and leave a reviewable trace. Deterministic rules may
invalidate, downgrade, or require review, but they should not silently promote a
candidate into approved registry truth.
## T01: Define Acceptance Policy Boundary
```task
id: RREG-WP-0014-T01
status: todo
priority: high
state_hub_task_id: "4bc2e749-ec9e-45d4-8095-63181efb752b"
```
Write the policy boundary between deterministic gates and acceptance
judgement.
Policy principles:
- Deterministic scanners generate observed facts and source refs.
- Deterministic quality gates can reject, downgrade, merge, flag, or require
review when criteria are formally expressible.
- Deterministic quality gates cannot approve candidate characteristics.
- Human reviewers can approve.
- Trusted agentic reviewers can approve only after producing an evidence-based
rationale.
- All automated review outcomes must be inspectable and reversible.
Acceptance criteria:
- Documentation states that deterministic auto-approval is prohibited.
- Existing "trusted auto-approve" terminology is marked for replacement or
migration.
- The allowed deterministic outcomes are explicitly listed.
- The allowed agentic outcomes are explicitly listed.
## T02: Create Transparent Quality Criteria Registry
```task
id: RREG-WP-0014-T02
status: todo
priority: high
state_hub_task_id: "101998a4-8cf8-4df0-8d05-c4e2041c0cac"
```
Create a reviewable quality criteria registry for candidate characteristics.
Initial criteria should cover:
- Source-role quality: intent/docs/source/tests are stronger than fixtures,
schema examples, agent guidance, CI/tooling, dependency declarations, or
derived scope.
- Native utility: owned/facade/adapter claims require explicit product evidence;
dependency, tooling, configuration, fixture, schema-example, and mention-only
claims are not native capabilities.
- Hierarchy fit: features should support their parent capability; misplaced
API/CLI surfaces should be flagged.
- Evidence sufficiency: candidate claims need source refs that support the
actual abstraction, not just matching vocabulary.
- Circularity: generated `SCOPE.md` text cannot be primary proof for rebuilding
the same characteristic model.
- Fixture contamination: tests and expectation files can prove scanner behavior
but should not become repo-native product capability claims.
Acceptance criteria:
- Criteria are stored in a versioned, human-readable format.
- Each criterion has an identifier, description, severity, deterministic action
if applicable, and reviewer guidance.
- Criteria can be listed through CLI and/or API.
- Assessment and review records include the criteria version used.
## T03: Implement Deterministic Quality Gate Outcomes
```task
id: RREG-WP-0014-T03
status: todo
priority: high
state_hub_task_id: "d599c084-a207-4910-9d0b-578d0c50f282"
```
Apply quality criteria before any human or agentic acceptance step.
Acceptance criteria:
- Candidate abilities, capabilities, features, and evidence can carry gate
outcomes such as `pass`, `downgraded`, `rejected`, `requires_review`, and
`invalidated`.
- Rejected or invalidated candidates remain auditable with reason codes.
- Downgraded candidates remain visible but cannot be accepted without explicit
reviewer override.
- Deterministic gates never mark a candidate as approved.
- The known repo-scoping LLM-provider self-scan failure is flagged before
acceptance.
## T04: Replace Trusted Auto-Approval With Agentic Review
```task
id: RREG-WP-0014-T04
status: todo
priority: high
state_hub_task_id: "b0d29756-7460-4ffa-8d56-d94cfb34e94f"
```
Replace `trusted_auto_approve_candidate_graph` behavior with an agentic review
workflow.
Acceptance criteria:
- Existing API/CLI/UI affordances no longer present deterministic
auto-approval as a safe path.
- A configured agentic reviewer receives the candidate graph, source refs,
quality-gate outcomes, criteria version, and repository context.
- The reviewer can approve, reject, downgrade, request human review, relink,
or propose edits.
- Each agentic approval includes a rationale tied to evidence and criteria.
- If no agentic reviewer is configured, candidates remain pending review.
## T05: Add Review Decision Audit Trail
```task
id: RREG-WP-0014-T05
status: todo
priority: high
state_hub_task_id: "0d12559a-831e-40ff-bf82-85f45b763f07"
```
Extend review decisions so acceptance history is useful for later audits and
self-scoping assessments.
Acceptance criteria:
- Review decisions record reviewer type: human, agent, deterministic-gate, or
migration.
- Agentic decisions record reviewer identity/configuration, criteria version,
prompt or policy version, evidence inspected, and rationale.
- Deterministic gate decisions record rule IDs and outcomes, not approval.
- Review records distinguish "candidate accepted as-is" from "accepted after
edits/relinks".
- Existing decisions remain readable through a migration or compatibility view.
## T06: Add Human Override And Criteria Refinement Flow
```task
id: RREG-WP-0014-T06
status: todo
priority: medium
state_hub_task_id: "bcba3237-fb87-4a38-8e96-12b872d5e6a9"
```
Make quality criteria reviewable and refineable instead of hidden in code.
Acceptance criteria:
- Reviewers can inspect which criteria fired for a candidate.
- Reviewers can override a gate with a reason.
- Overrides are searchable so repeated overrides can drive criteria changes.
- Criteria changes are versioned and linked to workplans or decisions.
- The UI makes it clear when a candidate is blocked by formal criteria versus
merely awaiting judgement.
## T07: Regression Coverage For Acceptance Boundary
```task
id: RREG-WP-0014-T07
status: todo
priority: high
state_hub_task_id: "37a22c89-ded5-42dd-aaa9-ece79477fcff"
```
Add tests that lock in the new acceptance boundary.
Acceptance criteria:
- Deterministic analysis can generate facts and candidates but cannot approve
them.
- Deterministic gates can reject/downgrade/require review with reason codes.
- Agentic review can approve only with a rationale and criteria version.
- The repo-scoping self-scan LLM-provider failure is not accepted by
deterministic rules.
- Existing manual review and approval paths keep working.
## T08: Migration And Compatibility Plan
```task
id: RREG-WP-0014-T08
status: todo
priority: medium
state_hub_task_id: "3d5475f6-71a7-4ca7-aa69-573e91d1fe1e"
```
Plan the migration away from trusted deterministic auto-approval.
Acceptance criteria:
- Existing approved maps created by trusted auto-approval can be identified.
- Users can rebuild or re-review those maps without losing audit history.
- API and CLI changes are documented with compatibility notes.
- The old behavior is either removed or guarded behind an explicit deprecated
migration mode that cannot run by default.
## Completion Criteria
- Deterministic rules no longer approve candidate characteristics.
- Transparent, versioned quality criteria can reject, downgrade, invalidate, or
require review.
- Agentic review is the only automated path that can stand in for human
acceptance.
- Acceptance decisions are auditable, evidence-bound, and useful as training
signal for future self-scoping assessment.