generated from coulomb/repo-seed
Cross repo isolation
This commit is contained in:
@@ -21,6 +21,9 @@ dev = [
|
|||||||
"pytest>=7.4",
|
"pytest>=7.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
repo-scoping = "repo_registry.cli:main"
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
where = ["src"]
|
where = ["src"]
|
||||||
|
|
||||||
|
|||||||
@@ -188,7 +188,7 @@ class CandidateGraphGenerator:
|
|||||||
),
|
),
|
||||||
source_refs=self._source_refs(interfaces),
|
source_refs=self._source_refs(interfaces),
|
||||||
primary_class="interface",
|
primary_class="interface",
|
||||||
attributes=self._interface_attributes(interfaces),
|
attributes=self._interface_attributes(interfaces, docs, chunks),
|
||||||
features=features,
|
features=features,
|
||||||
evidence=self._evidence(tests, examples, docs),
|
evidence=self._evidence(tests, examples, docs),
|
||||||
)
|
)
|
||||||
@@ -523,10 +523,33 @@ class CandidateGraphGenerator:
|
|||||||
attributes.append("interface")
|
attributes.append("interface")
|
||||||
return "developer-tooling", self._unique(attributes)
|
return "developer-tooling", self._unique(attributes)
|
||||||
|
|
||||||
def _interface_attributes(self, interfaces: list[ObservedFact]) -> list[str]:
|
def _interface_attributes(
|
||||||
|
self,
|
||||||
|
interfaces: list[ObservedFact],
|
||||||
|
docs: list[ObservedFact] | None = None,
|
||||||
|
chunks: list[ContentChunk] | None = None,
|
||||||
|
) -> list[str]:
|
||||||
feature_types = {self._feature_type(fact) for fact in interfaces}
|
feature_types = {self._feature_type(fact) for fact in interfaces}
|
||||||
attributes = ["api" if item == "API" else "cli" if item == "CLI" else "callable" for item in feature_types]
|
attributes = ["api" if item == "API" else "cli" if item == "CLI" else "callable" for item in feature_types]
|
||||||
return self._unique(["surface", *attributes, "utility-owned"])
|
utility = self._interface_utility_relationship(docs or [], chunks or [])
|
||||||
|
return self._unique(["surface", *attributes, f"utility-{utility}"])
|
||||||
|
|
||||||
|
def _interface_utility_relationship(
|
||||||
|
self,
|
||||||
|
docs: list[ObservedFact],
|
||||||
|
chunks: list[ContentChunk],
|
||||||
|
) -> str:
|
||||||
|
doc_paths = {fact.path for fact in docs}
|
||||||
|
text = " ".join(
|
||||||
|
chunk.text.lower()
|
||||||
|
for chunk in chunks
|
||||||
|
if chunk.path in doc_paths
|
||||||
|
and chunk.kind in {"intent", "documentation"}
|
||||||
|
and chunk.metadata.get("source_role") != "derived_scope"
|
||||||
|
)
|
||||||
|
if any(token in text for token in ("facade", "proxy", "wrapper", "wraps ")):
|
||||||
|
return "facade"
|
||||||
|
return "owned"
|
||||||
|
|
||||||
def _feature_attributes(
|
def _feature_attributes(
|
||||||
self,
|
self,
|
||||||
|
|||||||
157
src/repo_registry/cli.py
Normal file
157
src/repo_registry/cli.py
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Sequence
|
||||||
|
|
||||||
|
from repo_registry.core.models import CharacteristicRebuildResult, Repository
|
||||||
|
from repo_registry.core.service import RegistryService
|
||||||
|
from repo_registry.llm_extraction import LLMCandidateExtractor, create_llm_connect_adapter
|
||||||
|
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||||
|
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
||||||
|
from repo_registry.web_api.app import Settings
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="repo-scoping",
|
||||||
|
description="Repository Scoping maintenance commands.",
|
||||||
|
)
|
||||||
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||||
|
rebuild = subparsers.add_parser(
|
||||||
|
"rebuild-characteristics",
|
||||||
|
help="Rebuild candidate characteristics for one or more repositories.",
|
||||||
|
)
|
||||||
|
target = rebuild.add_mutually_exclusive_group(required=True)
|
||||||
|
target.add_argument("--repo", help="Repository id or exact repository name.")
|
||||||
|
target.add_argument("--all", action="store_true", help="Rebuild every repository.")
|
||||||
|
rebuild.add_argument("--dry-run", action="store_true", help="Preview without clearing approved characteristics.")
|
||||||
|
rebuild.add_argument("--no-llm", action="store_true", help="Disable configured LLM assistance.")
|
||||||
|
rebuild.add_argument(
|
||||||
|
"--trusted-auto-approve",
|
||||||
|
action="store_true",
|
||||||
|
help="Run trusted auto-approval after a confirmed rebuild.",
|
||||||
|
)
|
||||||
|
rebuild.add_argument(
|
||||||
|
"--confirm",
|
||||||
|
action="store_true",
|
||||||
|
help="Confirm a destructive rebuild for selected repositories.",
|
||||||
|
)
|
||||||
|
rebuild.add_argument(
|
||||||
|
"--confirm-all",
|
||||||
|
action="store_true",
|
||||||
|
help="Confirm a destructive all-repository rebuild.",
|
||||||
|
)
|
||||||
|
rebuild.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
|
||||||
|
rebuild.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: Sequence[str] | None = None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
if args.command == "rebuild-characteristics":
|
||||||
|
return rebuild_characteristics_command(args, parser)
|
||||||
|
parser.error(f"unknown command: {args.command}")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
|
||||||
|
def rebuild_characteristics_command(
|
||||||
|
args: argparse.Namespace,
|
||||||
|
parser: argparse.ArgumentParser,
|
||||||
|
) -> int:
|
||||||
|
dry_run = bool(args.dry_run)
|
||||||
|
if not dry_run and args.all and not args.confirm_all:
|
||||||
|
parser.error("--all destructive rebuilds require --confirm-all")
|
||||||
|
if not dry_run and not (args.confirm or args.confirm_all):
|
||||||
|
parser.error("destructive rebuilds require --confirm or --confirm-all")
|
||||||
|
|
||||||
|
service = service_from_args(args)
|
||||||
|
repositories = selected_repositories(service, args)
|
||||||
|
if not repositories:
|
||||||
|
parser.error("no repositories matched the requested target")
|
||||||
|
|
||||||
|
for repository in repositories:
|
||||||
|
result = service.rebuild_characteristics_from_scratch(
|
||||||
|
repository.id,
|
||||||
|
dry_run=dry_run,
|
||||||
|
confirm=not dry_run,
|
||||||
|
use_llm_assistance=not args.no_llm,
|
||||||
|
)
|
||||||
|
if args.trusted_auto_approve and not dry_run and result.analysis_run.status == "completed":
|
||||||
|
service.trusted_auto_approve_candidate_graph(
|
||||||
|
repository.id,
|
||||||
|
result.analysis_run.id,
|
||||||
|
notes="CLI trusted auto-approve after rebuild.",
|
||||||
|
)
|
||||||
|
print(rebuild_summary_line(service, result, args))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def service_from_args(args: argparse.Namespace) -> RegistryService:
|
||||||
|
settings = Settings()
|
||||||
|
database_path = Path(args.database_path or settings.database_path)
|
||||||
|
checkout_root = args.checkout_root or settings.checkout_root
|
||||||
|
database_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
store = RegistryStore(database_path)
|
||||||
|
store.initialize()
|
||||||
|
llm_extractor = None
|
||||||
|
if not args.no_llm and settings.llm_enabled and settings.llm_provider:
|
||||||
|
adapter = create_llm_connect_adapter(settings.llm_provider, model=settings.llm_model)
|
||||||
|
llm_extractor = LLMCandidateExtractor(adapter)
|
||||||
|
return RegistryService(
|
||||||
|
store,
|
||||||
|
ingestion=GitIngestionService(checkout_root),
|
||||||
|
llm_extractor=llm_extractor,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def selected_repositories(
|
||||||
|
service: RegistryService,
|
||||||
|
args: argparse.Namespace,
|
||||||
|
) -> list[Repository]:
|
||||||
|
repositories = service.list_repositories()
|
||||||
|
if args.all:
|
||||||
|
return repositories
|
||||||
|
repo = str(args.repo)
|
||||||
|
if repo.isdigit():
|
||||||
|
try:
|
||||||
|
return [service.get_repository(int(repo))]
|
||||||
|
except NotFoundError:
|
||||||
|
return []
|
||||||
|
return [repository for repository in repositories if repository.name == repo]
|
||||||
|
|
||||||
|
|
||||||
|
def rebuild_summary_line(
|
||||||
|
service: RegistryService,
|
||||||
|
result: CharacteristicRebuildResult,
|
||||||
|
args: argparse.Namespace,
|
||||||
|
) -> str:
|
||||||
|
graph = (
|
||||||
|
service.candidate_graph(result.repository.id, result.analysis_run.id)
|
||||||
|
if result.analysis_run.status == "completed"
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
remaining_review = 0
|
||||||
|
if graph is not None:
|
||||||
|
remaining_review = sum(
|
||||||
|
1
|
||||||
|
for ability in graph.abilities
|
||||||
|
for capability in ability.capabilities
|
||||||
|
if capability.status == "candidate"
|
||||||
|
)
|
||||||
|
candidate_source = "deterministic" if args.no_llm else "configured"
|
||||||
|
return (
|
||||||
|
f"repo={result.repository.id}:{result.repository.name} "
|
||||||
|
f"latest_analysis_run={result.analysis_run.id} "
|
||||||
|
f"candidate_source={candidate_source} "
|
||||||
|
f"dry_run={result.dry_run} "
|
||||||
|
f"cleared_approved={result.cleared_approved} "
|
||||||
|
f"approved_superseded={result.previous_counts} "
|
||||||
|
f"candidates={result.candidate_counts} "
|
||||||
|
f"remaining_review_queue={remaining_review}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -215,7 +215,7 @@ class RegistryService:
|
|||||||
candidate_source = "deterministic"
|
candidate_source = "deterministic"
|
||||||
candidates = normalize_candidate_drafts(candidates)
|
candidates = normalize_candidate_drafts(candidates)
|
||||||
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
|
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
|
||||||
if candidate_source == "llm":
|
if "llm" in candidate_source:
|
||||||
log_operation(
|
log_operation(
|
||||||
"llm_extraction_used",
|
"llm_extraction_used",
|
||||||
repository_id=repository_id,
|
repository_id=repository_id,
|
||||||
@@ -226,7 +226,10 @@ class RegistryService:
|
|||||||
repository_id,
|
repository_id,
|
||||||
completed_run.id,
|
completed_run.id,
|
||||||
action="llm_extraction_used",
|
action="llm_extraction_used",
|
||||||
notes=f"Generated {len(candidates)} candidate ability draft(s).",
|
notes=(
|
||||||
|
f"Generated {len(candidates)} candidate ability draft(s) "
|
||||||
|
f"from {candidate_source} candidate generation."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
if trusted_auto_approve:
|
if trusted_auto_approve:
|
||||||
self.trusted_auto_approve_candidate_graph(
|
self.trusted_auto_approve_candidate_graph(
|
||||||
@@ -260,11 +263,13 @@ class RegistryService:
|
|||||||
*,
|
*,
|
||||||
use_llm_assistance: bool = True,
|
use_llm_assistance: bool = True,
|
||||||
):
|
):
|
||||||
|
deterministic = self.candidate_generator.generate(repository, facts, chunks)
|
||||||
if use_llm_assistance and self.llm_extractor is not None:
|
if use_llm_assistance and self.llm_extractor is not None:
|
||||||
extracted = self.llm_extractor.extract(repository, chunks)
|
extracted = self.llm_extractor.extract(repository, chunks)
|
||||||
if extracted:
|
if extracted:
|
||||||
return self.llm_mapper.map(extracted, facts, chunks), "llm"
|
llm_candidates = self.llm_mapper.map(extracted, facts, chunks)
|
||||||
return self.candidate_generator.generate(repository, facts, chunks), "deterministic"
|
return llm_candidates + deterministic, "llm+deterministic"
|
||||||
|
return deterministic, "deterministic"
|
||||||
|
|
||||||
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
|
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
|
||||||
return self.store.list_analysis_runs(repository_id)
|
return self.store.list_analysis_runs(repository_id)
|
||||||
|
|||||||
@@ -75,12 +75,20 @@ class LLMCandidateExtractor:
|
|||||||
chunk_text = "\n\n".join(
|
chunk_text = "\n\n".join(
|
||||||
(
|
(
|
||||||
f"Source: {chunk.path}:{chunk.start_line}-{chunk.end_line} "
|
f"Source: {chunk.path}:{chunk.start_line}-{chunk.end_line} "
|
||||||
f"({chunk.kind})\n{chunk.text}"
|
f"({chunk.kind}; source_role={self._source_role(chunk)})\n{chunk.text}"
|
||||||
)
|
)
|
||||||
for chunk in chunks[:12]
|
for chunk in self._prompt_chunks(chunks)
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
"Extract a conservative, source-linked repository ability map.\n"
|
"Extract a conservative, source-linked repository ability map.\n"
|
||||||
|
"Use original repository utility only: capabilities the repository "
|
||||||
|
"owns, intentionally exposes as a facade, or implements as an adapter.\n"
|
||||||
|
"Prefer source_role=intent_summary, product_documentation, "
|
||||||
|
"implementation_source, and test_evidence. Do not use SCOPE.md or "
|
||||||
|
"source_role=derived_scope as primary evidence; it is a derived prior "
|
||||||
|
"registry view and may be stale. Ignore agent guidance, CI/tooling, "
|
||||||
|
"dependency-only, and mention-only context unless owned product "
|
||||||
|
"evidence supports the same claim.\n"
|
||||||
"Return strict JSON only with this shape:\n"
|
"Return strict JSON only with this shape:\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
' "abilities": [\n'
|
' "abilities": [\n'
|
||||||
@@ -108,6 +116,46 @@ class LLMCandidateExtractor:
|
|||||||
f"{chunk_text}\n"
|
f"{chunk_text}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _prompt_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
|
||||||
|
promptable = [
|
||||||
|
chunk
|
||||||
|
for chunk in chunks
|
||||||
|
if self._source_role(chunk) not in {"agent_guidance", "derived_scope"}
|
||||||
|
]
|
||||||
|
return sorted(
|
||||||
|
promptable,
|
||||||
|
key=lambda chunk: (
|
||||||
|
self._source_role_priority(self._source_role(chunk)),
|
||||||
|
chunk.path,
|
||||||
|
chunk.start_line,
|
||||||
|
),
|
||||||
|
)[:12]
|
||||||
|
|
||||||
|
def _source_role(self, chunk: ContentChunk) -> str:
|
||||||
|
role = chunk.metadata.get("source_role")
|
||||||
|
if isinstance(role, str) and role:
|
||||||
|
return role
|
||||||
|
path = chunk.path.lower()
|
||||||
|
if path.endswith("intent.md"):
|
||||||
|
return "intent_summary"
|
||||||
|
if path.endswith("scope.md"):
|
||||||
|
return "derived_scope"
|
||||||
|
if path.endswith(("agents.md", "claude.md")) or "/.claude/" in path:
|
||||||
|
return "agent_guidance"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _source_role_priority(self, source_role: str) -> int:
|
||||||
|
priorities = {
|
||||||
|
"intent_summary": 0,
|
||||||
|
"product_documentation": 1,
|
||||||
|
"implementation_source": 2,
|
||||||
|
"test_evidence": 3,
|
||||||
|
"configuration": 4,
|
||||||
|
"dependency_declaration": 5,
|
||||||
|
"ci_tooling": 6,
|
||||||
|
}
|
||||||
|
return priorities.get(source_role, 7)
|
||||||
|
|
||||||
def parse_response(self, content: str) -> list[ExtractedAbility]:
|
def parse_response(self, content: str) -> list[ExtractedAbility]:
|
||||||
try:
|
try:
|
||||||
payload = json.loads(self._json_text(content))
|
payload = json.loads(self._json_text(content))
|
||||||
|
|||||||
@@ -73,3 +73,93 @@ def write_empty_repo(root: Path) -> Path:
|
|||||||
repo = root / "empty-repo"
|
repo = root / "empty-repo"
|
||||||
repo.mkdir()
|
repo.mkdir()
|
||||||
return repo
|
return repo
|
||||||
|
|
||||||
|
|
||||||
|
def write_key_cape_like_repo(root: Path) -> Path:
|
||||||
|
repo = root / "key-cape-like"
|
||||||
|
repo.mkdir()
|
||||||
|
(repo / "INTENT.md").write_text(
|
||||||
|
"# INTENT\n\n"
|
||||||
|
"Provide lightweight IAM profile enforcement for small deployments.\n\n"
|
||||||
|
"## Intended Capabilities\n\n"
|
||||||
|
"- Enforce OIDC PKCE profiles.\n"
|
||||||
|
"- Validate LDAP schema migrations.\n"
|
||||||
|
"- Run migration tooling for identity data.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / "SCOPE.md").write_text(
|
||||||
|
"# SCOPE\n\n"
|
||||||
|
"Old polluted scope mentions routing LLM provider requests.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / "README.md").write_text(
|
||||||
|
"# KeyCape\n\n"
|
||||||
|
"Lightweight IAM service with OIDC profile enforcement and LDAP schema "
|
||||||
|
"validation. Backend adapters live under src/internal/adapters.\n"
|
||||||
|
"See CLAUDE.md for agent workflow.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / "CLAUDE.md").write_text(
|
||||||
|
"# CLAUDE.md\n\n"
|
||||||
|
"Guidance for Claude Code when working in this repository.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / "src" / "internal" / "adapters").mkdir(parents=True)
|
||||||
|
(repo / "src" / "internal" / "adapters" / "oidc.py").write_text(
|
||||||
|
"def enforce_pkce_profile(client):\n"
|
||||||
|
" return client.require_pkce\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
return repo
|
||||||
|
|
||||||
|
|
||||||
|
def write_llm_connect_like_repo(root: Path) -> Path:
|
||||||
|
repo = root / "llm-connect-like"
|
||||||
|
repo.mkdir()
|
||||||
|
(repo / "README.md").write_text(
|
||||||
|
"# LLM Connect\n\nSupports OpenRouter and Claude fallback for prompts.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / ".env.example").write_text(
|
||||||
|
"OPENROUTER_API_KEY=\nANTHROPIC_API_KEY=\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / "providers.py").write_text(
|
||||||
|
"provider_registry = {'openrouter': OpenRouterAdapter, 'anthropic': ClaudeAdapter}\n"
|
||||||
|
"fallback_provider = 'claude'\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
return repo
|
||||||
|
|
||||||
|
|
||||||
|
def write_facade_repo(root: Path) -> Path:
|
||||||
|
repo = root / "facade-repo"
|
||||||
|
repo.mkdir()
|
||||||
|
(repo / "README.md").write_text(
|
||||||
|
"# Mail Facade\n\n"
|
||||||
|
"Provides a public HTTP facade that wraps the upstream mail classifier.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / "app.py").write_text(
|
||||||
|
"from fastapi import FastAPI\n"
|
||||||
|
"app = FastAPI()\n"
|
||||||
|
'@app.post("/classify")\n'
|
||||||
|
"def classify():\n"
|
||||||
|
" return {}\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
return repo
|
||||||
|
|
||||||
|
|
||||||
|
def write_dependency_only_repo(root: Path) -> Path:
|
||||||
|
repo = root / "dependency-only"
|
||||||
|
repo.mkdir()
|
||||||
|
(repo / "README.md").write_text(
|
||||||
|
"# Dependency Only\n\nUses OpenRouter during experiments but exposes no API.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(repo / "requirements.txt").write_text(
|
||||||
|
"openai\nanthropic\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
return repo
|
||||||
|
|||||||
100
tests/test_cli.py
Normal file
100
tests/test_cli.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from repo_registry.cli import main
|
||||||
|
from repo_registry.core.service import RegistryService
|
||||||
|
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||||
|
from repo_registry.storage.sqlite import RegistryStore
|
||||||
|
|
||||||
|
|
||||||
|
def make_service(tmp_path):
|
||||||
|
store = RegistryStore(tmp_path / "registry.sqlite3")
|
||||||
|
store.initialize()
|
||||||
|
return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts"))
|
||||||
|
|
||||||
|
|
||||||
|
def write_repo(tmp_path):
|
||||||
|
source = tmp_path / "repo"
|
||||||
|
source.mkdir()
|
||||||
|
(source / "README.md").write_text("# CLI Rebuild\nReports health.\n", encoding="utf-8")
|
||||||
|
(source / "app.py").write_text('@app.get("/health")\ndef health():\n return {}\n', encoding="utf-8")
|
||||||
|
return source
|
||||||
|
|
||||||
|
|
||||||
|
def approved_repository(tmp_path):
|
||||||
|
service = make_service(tmp_path)
|
||||||
|
source = write_repo(tmp_path)
|
||||||
|
repository = service.register_repository(name="CLI Rebuild", url=str(source))
|
||||||
|
summary = service.analyze_repository(repository.id, use_llm_assistance=False)
|
||||||
|
service.approve_candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
return service, repository
|
||||||
|
|
||||||
|
|
||||||
|
def test_rebuild_cli_dry_run_preserves_approved_characteristics(tmp_path, capsys):
|
||||||
|
service, repository = approved_repository(tmp_path)
|
||||||
|
|
||||||
|
exit_code = main(
|
||||||
|
[
|
||||||
|
"rebuild-characteristics",
|
||||||
|
"--repo",
|
||||||
|
str(repository.id),
|
||||||
|
"--dry-run",
|
||||||
|
"--no-llm",
|
||||||
|
"--database-path",
|
||||||
|
str(tmp_path / "registry.sqlite3"),
|
||||||
|
"--checkout-root",
|
||||||
|
str(tmp_path / "checkouts"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert exit_code == 0
|
||||||
|
assert "repo=1:CLI Rebuild" in output
|
||||||
|
assert "latest_analysis_run=2" in output
|
||||||
|
assert "candidate_source=deterministic" in output
|
||||||
|
assert "dry_run=True" in output
|
||||||
|
assert "cleared_approved=False" in output
|
||||||
|
assert service.ability_map(repository.id).abilities
|
||||||
|
|
||||||
|
|
||||||
|
def test_rebuild_cli_confirmed_single_repo_clears_approved_characteristics(tmp_path, capsys):
|
||||||
|
_service, repository = approved_repository(tmp_path)
|
||||||
|
|
||||||
|
exit_code = main(
|
||||||
|
[
|
||||||
|
"rebuild-characteristics",
|
||||||
|
"--repo",
|
||||||
|
str(repository.id),
|
||||||
|
"--no-llm",
|
||||||
|
"--confirm",
|
||||||
|
"--database-path",
|
||||||
|
str(tmp_path / "registry.sqlite3"),
|
||||||
|
"--checkout-root",
|
||||||
|
str(tmp_path / "checkouts"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
service = make_service(tmp_path)
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert exit_code == 0
|
||||||
|
assert "dry_run=False" in output
|
||||||
|
assert "cleared_approved=True" in output
|
||||||
|
assert service.ability_map(repository.id).abilities == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_rebuild_cli_refuses_destructive_all_without_confirm_all(tmp_path):
|
||||||
|
approved_repository(tmp_path)
|
||||||
|
|
||||||
|
with pytest.raises(SystemExit) as exc:
|
||||||
|
main(
|
||||||
|
[
|
||||||
|
"rebuild-characteristics",
|
||||||
|
"--all",
|
||||||
|
"--confirm",
|
||||||
|
"--database-path",
|
||||||
|
str(tmp_path / "registry.sqlite3"),
|
||||||
|
"--checkout-root",
|
||||||
|
str(tmp_path / "checkouts"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert exc.value.code == 2
|
||||||
@@ -50,6 +50,58 @@ def chunk():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_prompt_filters_derived_scope_and_labels_source_roles():
|
||||||
|
adapter = FakeAdapter('{"abilities": []}')
|
||||||
|
extractor = LLMCandidateExtractor(adapter)
|
||||||
|
chunks = [
|
||||||
|
ContentChunk(
|
||||||
|
id=1,
|
||||||
|
repository_id=1,
|
||||||
|
analysis_run_id=1,
|
||||||
|
snapshot_id=1,
|
||||||
|
path="SCOPE.md",
|
||||||
|
kind="scope",
|
||||||
|
start_line=1,
|
||||||
|
end_line=3,
|
||||||
|
text="# SCOPE\n\nOld approved LLM routing entry.",
|
||||||
|
metadata={"source_role": "derived_scope"},
|
||||||
|
),
|
||||||
|
ContentChunk(
|
||||||
|
id=2,
|
||||||
|
repository_id=1,
|
||||||
|
analysis_run_id=1,
|
||||||
|
snapshot_id=1,
|
||||||
|
path="INTENT.md",
|
||||||
|
kind="intent",
|
||||||
|
start_line=1,
|
||||||
|
end_line=3,
|
||||||
|
text="# INTENT\n\nProvide lightweight IAM.",
|
||||||
|
metadata={"source_role": "intent_summary"},
|
||||||
|
),
|
||||||
|
ContentChunk(
|
||||||
|
id=3,
|
||||||
|
repository_id=1,
|
||||||
|
analysis_run_id=1,
|
||||||
|
snapshot_id=1,
|
||||||
|
path="CLAUDE.md",
|
||||||
|
kind="documentation",
|
||||||
|
start_line=1,
|
||||||
|
end_line=2,
|
||||||
|
text="# CLAUDE\n\nAgent guidance.",
|
||||||
|
metadata={"source_role": "agent_guidance"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
extractor.extract(repository(), chunks)
|
||||||
|
|
||||||
|
assert "Source: INTENT.md" in adapter.last_prompt
|
||||||
|
assert "source_role=intent_summary" in adapter.last_prompt
|
||||||
|
assert "Source: SCOPE.md" not in adapter.last_prompt
|
||||||
|
assert "Old approved LLM routing entry" not in adapter.last_prompt
|
||||||
|
assert "Source: CLAUDE.md" not in adapter.last_prompt
|
||||||
|
assert "Do not use SCOPE.md" in adapter.last_prompt
|
||||||
|
|
||||||
|
|
||||||
def test_llm_candidate_extractor_parses_structured_response():
|
def test_llm_candidate_extractor_parses_structured_response():
|
||||||
adapter = FakeAdapter(
|
adapter = FakeAdapter(
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -9,8 +9,12 @@ from repo_registry.repo_ingestion.git import GitIngestionService
|
|||||||
from repo_registry.semantic import HashingEmbeddingProvider
|
from repo_registry.semantic import HashingEmbeddingProvider
|
||||||
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
||||||
from tests.fixtures import (
|
from tests.fixtures import (
|
||||||
|
write_dependency_only_repo,
|
||||||
write_empty_repo,
|
write_empty_repo,
|
||||||
|
write_facade_repo,
|
||||||
write_javascript_typescript_package_repo,
|
write_javascript_typescript_package_repo,
|
||||||
|
write_key_cape_like_repo,
|
||||||
|
write_llm_connect_like_repo,
|
||||||
write_misleading_docs_repo,
|
write_misleading_docs_repo,
|
||||||
write_python_cli_repo,
|
write_python_cli_repo,
|
||||||
write_readme_only_repo,
|
write_readme_only_repo,
|
||||||
@@ -396,6 +400,80 @@ def test_fixture_breadth_misleading_docs_do_not_become_approved_truth(tmp_path):
|
|||||||
assert ability_map.abilities == []
|
assert ability_map.abilities == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_regression_key_cape_like_repo_centers_iam_not_llm_provider_routing(tmp_path):
|
||||||
|
source = write_key_cape_like_repo(tmp_path)
|
||||||
|
service = make_service(tmp_path)
|
||||||
|
repository = service.register_repository(name="KeyCape Like", url=str(source))
|
||||||
|
|
||||||
|
summary = service.analyze_repository(repository.id, use_llm_assistance=False)
|
||||||
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
|
capability_names = {
|
||||||
|
capability.name
|
||||||
|
for ability in graph.abilities
|
||||||
|
for capability in ability.capabilities
|
||||||
|
}
|
||||||
|
assert "Enforce OIDC PKCE Profiles" in capability_names
|
||||||
|
assert "Validate LDAP Schema Migrations" in capability_names
|
||||||
|
assert "Run Migration Tooling For Identity Data" in capability_names
|
||||||
|
assert "Route LLM Requests Across Providers" not in capability_names
|
||||||
|
facts = {(fact.kind, fact.name, fact.path) for fact in summary.facts}
|
||||||
|
assert ("llm_provider", "Claude", "CLAUDE.md") not in facts
|
||||||
|
|
||||||
|
|
||||||
|
def test_regression_llm_connect_like_repo_still_promotes_provider_routing(tmp_path):
|
||||||
|
source = write_llm_connect_like_repo(tmp_path)
|
||||||
|
service = make_service(tmp_path)
|
||||||
|
repository = service.register_repository(name="LLM Connect Like", url=str(source))
|
||||||
|
|
||||||
|
summary = service.analyze_repository(repository.id, use_llm_assistance=False)
|
||||||
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
|
capability = next(
|
||||||
|
capability
|
||||||
|
for ability in graph.abilities
|
||||||
|
for capability in ability.capabilities
|
||||||
|
if capability.name == "Route LLM Requests Across Providers"
|
||||||
|
)
|
||||||
|
assert {"utility-adapter", "llm-provider", "openrouter", "claude"} <= set(
|
||||||
|
capability.attributes
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_regression_facade_repo_promotes_public_wrapper_as_facade(tmp_path):
|
||||||
|
source = write_facade_repo(tmp_path)
|
||||||
|
service = make_service(tmp_path)
|
||||||
|
repository = service.register_repository(name="Mail Facade", url=str(source))
|
||||||
|
|
||||||
|
summary = service.analyze_repository(repository.id, use_llm_assistance=False)
|
||||||
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
|
capability = graph.abilities[0].capabilities[0]
|
||||||
|
assert capability.name == "Expose Repository Interface"
|
||||||
|
assert "utility-facade" in capability.attributes
|
||||||
|
assert "POST /classify" in {feature.name for feature in capability.features}
|
||||||
|
|
||||||
|
|
||||||
|
def test_regression_dependency_only_repo_keeps_libraries_as_context(tmp_path):
|
||||||
|
source = write_dependency_only_repo(tmp_path)
|
||||||
|
service = make_service(tmp_path)
|
||||||
|
repository = service.register_repository(name="Dependency Only", url=str(source))
|
||||||
|
|
||||||
|
summary = service.analyze_repository(repository.id, use_llm_assistance=False)
|
||||||
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
|
capability_names = {
|
||||||
|
capability.name
|
||||||
|
for ability in graph.abilities
|
||||||
|
for capability in ability.capabilities
|
||||||
|
}
|
||||||
|
assert "Route LLM Requests Across Providers" not in capability_names
|
||||||
|
assert capability_names == {"Describe Repository Structure"}
|
||||||
|
structure = graph.abilities[0].capabilities[0]
|
||||||
|
assert "utility-dependency" in structure.attributes
|
||||||
|
assert "review-required-structural-context" in structure.attributes
|
||||||
|
|
||||||
|
|
||||||
def test_fixture_breadth_empty_repo_produces_no_candidate_claims(tmp_path):
|
def test_fixture_breadth_empty_repo_produces_no_candidate_claims(tmp_path):
|
||||||
source = write_empty_repo(tmp_path)
|
source = write_empty_repo(tmp_path)
|
||||||
service = make_service(tmp_path)
|
service = make_service(tmp_path)
|
||||||
@@ -622,7 +700,110 @@ def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
|
|||||||
assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email"
|
assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email"
|
||||||
assert graph.abilities[0].source_refs[0].path == "README.md"
|
assert graph.abilities[0].source_refs[0].path == "README.md"
|
||||||
assert decisions[0].action == "llm_extraction_used"
|
assert decisions[0].action == "llm_extraction_used"
|
||||||
assert "1 candidate ability" in decisions[0].notes
|
assert "llm+deterministic candidate generation" in decisions[0].notes
|
||||||
|
assert {ability.name for ability in graph.abilities} >= {
|
||||||
|
"Business Email Routing",
|
||||||
|
"Route Incoming Customer Email",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_repository_keeps_deterministic_candidates_when_llm_returns_stale_entries(tmp_path):
|
||||||
|
source = tmp_path / "repo"
|
||||||
|
source.mkdir()
|
||||||
|
(source / "INTENT.md").write_text(
|
||||||
|
"# INTENT\n\n"
|
||||||
|
"Provide lightweight IAM.\n\n"
|
||||||
|
"## Intended Capabilities\n\n"
|
||||||
|
"- Enforce OIDC PKCE profiles.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(source / "SCOPE.md").write_text(
|
||||||
|
"# SCOPE\n\nOld approved entry: route LLM provider requests.\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
store = RegistryStore(tmp_path / "registry.sqlite3")
|
||||||
|
store.initialize()
|
||||||
|
extractor = FakeLLMExtractor(
|
||||||
|
[
|
||||||
|
ExtractedAbility(
|
||||||
|
name="Old LLM Routing",
|
||||||
|
description="Stale prior scope claim.",
|
||||||
|
source_paths=["SCOPE.md"],
|
||||||
|
capabilities=[
|
||||||
|
ExtractedCapability(
|
||||||
|
name="Route LLM Provider Requests",
|
||||||
|
description="Old scope reuse.",
|
||||||
|
source_paths=["SCOPE.md"],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
service = RegistryService(
|
||||||
|
store,
|
||||||
|
ingestion=GitIngestionService(tmp_path / "checkouts"),
|
||||||
|
llm_extractor=extractor,
|
||||||
|
)
|
||||||
|
repository = service.register_repository(name="KeyCape Like", url=str(source))
|
||||||
|
|
||||||
|
summary = service.analyze_repository(repository.id)
|
||||||
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
|
capability_names = {
|
||||||
|
capability.name
|
||||||
|
for ability in graph.abilities
|
||||||
|
for capability in ability.capabilities
|
||||||
|
}
|
||||||
|
assert "Route LLM Provider Requests" in capability_names
|
||||||
|
assert "Enforce OIDC PKCE Profiles" in capability_names
|
||||||
|
assert decisions[0].action == "llm_extraction_used"
|
||||||
|
assert "llm+deterministic candidate generation" in decisions[0].notes
|
||||||
|
|
||||||
|
|
||||||
|
def test_analysis_isolation_between_repositories_with_stale_approved_data(tmp_path):
|
||||||
|
poisoned_source = write_llm_connect_like_repo(tmp_path)
|
||||||
|
target_source = write_key_cape_like_repo(tmp_path)
|
||||||
|
service = make_service(tmp_path)
|
||||||
|
poisoned = service.register_repository(
|
||||||
|
name="Poisoned LLM Connect",
|
||||||
|
url=str(poisoned_source),
|
||||||
|
)
|
||||||
|
target = service.register_repository(
|
||||||
|
name="Isolated KeyCape",
|
||||||
|
url=str(target_source),
|
||||||
|
)
|
||||||
|
|
||||||
|
poisoned_summary = service.analyze_repository(
|
||||||
|
poisoned.id,
|
||||||
|
use_llm_assistance=False,
|
||||||
|
)
|
||||||
|
service.approve_candidate_graph(poisoned.id, poisoned_summary.analysis_run.id)
|
||||||
|
assert any(
|
||||||
|
capability.name == "Route LLM Requests Across Providers"
|
||||||
|
for ability in service.ability_map(poisoned.id).abilities
|
||||||
|
for capability in ability.capabilities
|
||||||
|
)
|
||||||
|
|
||||||
|
target_summary = service.analyze_repository(
|
||||||
|
target.id,
|
||||||
|
use_llm_assistance=False,
|
||||||
|
)
|
||||||
|
target_graph = service.candidate_graph(target.id, target_summary.analysis_run.id)
|
||||||
|
target_facts = service.list_observed_facts(target.id, target_summary.analysis_run.id)
|
||||||
|
target_chunks = service.list_content_chunks(target.id, target_summary.analysis_run.id)
|
||||||
|
|
||||||
|
target_capability_names = {
|
||||||
|
capability.name
|
||||||
|
for ability in target_graph.abilities
|
||||||
|
for capability in ability.capabilities
|
||||||
|
}
|
||||||
|
assert "Enforce OIDC PKCE Profiles" in target_capability_names
|
||||||
|
assert "Route LLM Requests Across Providers" not in target_capability_names
|
||||||
|
assert all(fact.repository_id == target.id for fact in target_facts)
|
||||||
|
assert all(chunk.repository_id == target.id for chunk in target_chunks)
|
||||||
|
assert all(ref.path != "providers.py" for ability in target_graph.abilities for ref in ability.source_refs)
|
||||||
|
assert service.ability_map(target.id).abilities == []
|
||||||
|
|
||||||
|
|
||||||
def test_analyze_repository_can_disable_optional_llm_extractor(tmp_path):
|
def test_analyze_repository_can_disable_optional_llm_extractor(tmp_path):
|
||||||
@@ -695,8 +876,9 @@ def test_analyze_repository_normalizes_duplicate_llm_candidates(tmp_path):
|
|||||||
summary = service.analyze_repository(repository.id)
|
summary = service.analyze_repository(repository.id)
|
||||||
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
|
||||||
|
|
||||||
assert len(graph.abilities) == 1
|
assert len(graph.abilities) == 2
|
||||||
assert graph.abilities[0].name == "LLM Provider Integrations"
|
assert graph.abilities[0].name == "LLM Provider Integrations"
|
||||||
|
assert graph.abilities[1].name == "Support OpenRouter Providers"
|
||||||
|
|
||||||
|
|
||||||
def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):
|
def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ type: workplan
|
|||||||
title: "Provenance-Aware Characteristic Rebuild"
|
title: "Provenance-Aware Characteristic Rebuild"
|
||||||
domain: capabilities
|
domain: capabilities
|
||||||
repo: repo-scoping
|
repo: repo-scoping
|
||||||
status: active
|
status: done
|
||||||
owner: codex
|
owner: codex
|
||||||
topic_slug: foerster-capabilities
|
topic_slug: foerster-capabilities
|
||||||
created: "2026-05-01"
|
created: "2026-05-01"
|
||||||
@@ -195,7 +195,7 @@ Acceptance criteria:
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: RREG-WP-0009-T07
|
id: RREG-WP-0009-T07
|
||||||
status: todo
|
status: done
|
||||||
priority: medium
|
priority: medium
|
||||||
state_hub_task_id: "7afd6550-e4a4-4a8a-94bf-d974b0ccb8d2"
|
state_hub_task_id: "7afd6550-e4a4-4a8a-94bf-d974b0ccb8d2"
|
||||||
```
|
```
|
||||||
@@ -216,7 +216,7 @@ Acceptance criteria:
|
|||||||
|
|
||||||
```task
|
```task
|
||||||
id: RREG-WP-0009-T08
|
id: RREG-WP-0009-T08
|
||||||
status: todo
|
status: done
|
||||||
priority: high
|
priority: high
|
||||||
state_hub_task_id: "05077f3d-d40d-45fd-865c-0924407beb4f"
|
state_hub_task_id: "05077f3d-d40d-45fd-865c-0924407beb4f"
|
||||||
```
|
```
|
||||||
@@ -256,3 +256,24 @@ Acceptance criteria:
|
|||||||
analysis while preserving approved characteristics.
|
analysis while preserving approved characteristics.
|
||||||
- key-cape is documented as the motivating failure mode without hard-coding
|
- key-cape is documented as the motivating failure mode without hard-coding
|
||||||
product-specific behavior into the scanner.
|
product-specific behavior into the scanner.
|
||||||
|
|
||||||
|
## Cross-Repository Analysis Isolation
|
||||||
|
|
||||||
|
```task
|
||||||
|
id: RREG-WP-0009-T10
|
||||||
|
status: done
|
||||||
|
priority: high
|
||||||
|
```
|
||||||
|
|
||||||
|
Validate that analyzing one repository never depends on approved maps,
|
||||||
|
candidate graphs, facts, chunks, or derived scope data from any other
|
||||||
|
repository in the registry database.
|
||||||
|
|
||||||
|
Acceptance criteria:
|
||||||
|
- A repository with stale approved characteristics cannot influence fresh
|
||||||
|
candidate generation for another repository.
|
||||||
|
- Candidate graph, observed fact, and content chunk lookups remain scoped by
|
||||||
|
repository and analysis run.
|
||||||
|
- Tests cover a poisoned-repo scenario where repo A contains old LLM/provider
|
||||||
|
characteristics and repo B still generates only its own repository-owned
|
||||||
|
candidates.
|
||||||
|
|||||||
Reference in New Issue
Block a user