generated from coulomb/repo-seed
Cross repo isolation
This commit is contained in:
@@ -188,7 +188,7 @@ class CandidateGraphGenerator:
|
||||
),
|
||||
source_refs=self._source_refs(interfaces),
|
||||
primary_class="interface",
|
||||
attributes=self._interface_attributes(interfaces),
|
||||
attributes=self._interface_attributes(interfaces, docs, chunks),
|
||||
features=features,
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
@@ -523,10 +523,33 @@ class CandidateGraphGenerator:
|
||||
attributes.append("interface")
|
||||
return "developer-tooling", self._unique(attributes)
|
||||
|
||||
def _interface_attributes(self, interfaces: list[ObservedFact]) -> list[str]:
|
||||
def _interface_attributes(
|
||||
self,
|
||||
interfaces: list[ObservedFact],
|
||||
docs: list[ObservedFact] | None = None,
|
||||
chunks: list[ContentChunk] | None = None,
|
||||
) -> list[str]:
|
||||
feature_types = {self._feature_type(fact) for fact in interfaces}
|
||||
attributes = ["api" if item == "API" else "cli" if item == "CLI" else "callable" for item in feature_types]
|
||||
return self._unique(["surface", *attributes, "utility-owned"])
|
||||
utility = self._interface_utility_relationship(docs or [], chunks or [])
|
||||
return self._unique(["surface", *attributes, f"utility-{utility}"])
|
||||
|
||||
def _interface_utility_relationship(
|
||||
self,
|
||||
docs: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> str:
|
||||
doc_paths = {fact.path for fact in docs}
|
||||
text = " ".join(
|
||||
chunk.text.lower()
|
||||
for chunk in chunks
|
||||
if chunk.path in doc_paths
|
||||
and chunk.kind in {"intent", "documentation"}
|
||||
and chunk.metadata.get("source_role") != "derived_scope"
|
||||
)
|
||||
if any(token in text for token in ("facade", "proxy", "wrapper", "wraps ")):
|
||||
return "facade"
|
||||
return "owned"
|
||||
|
||||
def _feature_attributes(
|
||||
self,
|
||||
|
||||
157
src/repo_registry/cli.py
Normal file
157
src/repo_registry/cli.py
Normal file
@@ -0,0 +1,157 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from repo_registry.core.models import CharacteristicRebuildResult, Repository
|
||||
from repo_registry.core.service import RegistryService
|
||||
from repo_registry.llm_extraction import LLMCandidateExtractor, create_llm_connect_adapter
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
||||
from repo_registry.web_api.app import Settings
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="repo-scoping",
|
||||
description="Repository Scoping maintenance commands.",
|
||||
)
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
rebuild = subparsers.add_parser(
|
||||
"rebuild-characteristics",
|
||||
help="Rebuild candidate characteristics for one or more repositories.",
|
||||
)
|
||||
target = rebuild.add_mutually_exclusive_group(required=True)
|
||||
target.add_argument("--repo", help="Repository id or exact repository name.")
|
||||
target.add_argument("--all", action="store_true", help="Rebuild every repository.")
|
||||
rebuild.add_argument("--dry-run", action="store_true", help="Preview without clearing approved characteristics.")
|
||||
rebuild.add_argument("--no-llm", action="store_true", help="Disable configured LLM assistance.")
|
||||
rebuild.add_argument(
|
||||
"--trusted-auto-approve",
|
||||
action="store_true",
|
||||
help="Run trusted auto-approval after a confirmed rebuild.",
|
||||
)
|
||||
rebuild.add_argument(
|
||||
"--confirm",
|
||||
action="store_true",
|
||||
help="Confirm a destructive rebuild for selected repositories.",
|
||||
)
|
||||
rebuild.add_argument(
|
||||
"--confirm-all",
|
||||
action="store_true",
|
||||
help="Confirm a destructive all-repository rebuild.",
|
||||
)
|
||||
rebuild.add_argument("--database-path", help="Override REPO_REGISTRY_DATABASE_PATH.")
|
||||
rebuild.add_argument("--checkout-root", help="Override REPO_REGISTRY_CHECKOUT_ROOT.")
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: Sequence[str] | None = None) -> int:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args(argv)
|
||||
if args.command == "rebuild-characteristics":
|
||||
return rebuild_characteristics_command(args, parser)
|
||||
parser.error(f"unknown command: {args.command}")
|
||||
return 2
|
||||
|
||||
|
||||
def rebuild_characteristics_command(
|
||||
args: argparse.Namespace,
|
||||
parser: argparse.ArgumentParser,
|
||||
) -> int:
|
||||
dry_run = bool(args.dry_run)
|
||||
if not dry_run and args.all and not args.confirm_all:
|
||||
parser.error("--all destructive rebuilds require --confirm-all")
|
||||
if not dry_run and not (args.confirm or args.confirm_all):
|
||||
parser.error("destructive rebuilds require --confirm or --confirm-all")
|
||||
|
||||
service = service_from_args(args)
|
||||
repositories = selected_repositories(service, args)
|
||||
if not repositories:
|
||||
parser.error("no repositories matched the requested target")
|
||||
|
||||
for repository in repositories:
|
||||
result = service.rebuild_characteristics_from_scratch(
|
||||
repository.id,
|
||||
dry_run=dry_run,
|
||||
confirm=not dry_run,
|
||||
use_llm_assistance=not args.no_llm,
|
||||
)
|
||||
if args.trusted_auto_approve and not dry_run and result.analysis_run.status == "completed":
|
||||
service.trusted_auto_approve_candidate_graph(
|
||||
repository.id,
|
||||
result.analysis_run.id,
|
||||
notes="CLI trusted auto-approve after rebuild.",
|
||||
)
|
||||
print(rebuild_summary_line(service, result, args))
|
||||
return 0
|
||||
|
||||
|
||||
def service_from_args(args: argparse.Namespace) -> RegistryService:
|
||||
settings = Settings()
|
||||
database_path = Path(args.database_path or settings.database_path)
|
||||
checkout_root = args.checkout_root or settings.checkout_root
|
||||
database_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
store = RegistryStore(database_path)
|
||||
store.initialize()
|
||||
llm_extractor = None
|
||||
if not args.no_llm and settings.llm_enabled and settings.llm_provider:
|
||||
adapter = create_llm_connect_adapter(settings.llm_provider, model=settings.llm_model)
|
||||
llm_extractor = LLMCandidateExtractor(adapter)
|
||||
return RegistryService(
|
||||
store,
|
||||
ingestion=GitIngestionService(checkout_root),
|
||||
llm_extractor=llm_extractor,
|
||||
)
|
||||
|
||||
|
||||
def selected_repositories(
|
||||
service: RegistryService,
|
||||
args: argparse.Namespace,
|
||||
) -> list[Repository]:
|
||||
repositories = service.list_repositories()
|
||||
if args.all:
|
||||
return repositories
|
||||
repo = str(args.repo)
|
||||
if repo.isdigit():
|
||||
try:
|
||||
return [service.get_repository(int(repo))]
|
||||
except NotFoundError:
|
||||
return []
|
||||
return [repository for repository in repositories if repository.name == repo]
|
||||
|
||||
|
||||
def rebuild_summary_line(
|
||||
service: RegistryService,
|
||||
result: CharacteristicRebuildResult,
|
||||
args: argparse.Namespace,
|
||||
) -> str:
|
||||
graph = (
|
||||
service.candidate_graph(result.repository.id, result.analysis_run.id)
|
||||
if result.analysis_run.status == "completed"
|
||||
else None
|
||||
)
|
||||
remaining_review = 0
|
||||
if graph is not None:
|
||||
remaining_review = sum(
|
||||
1
|
||||
for ability in graph.abilities
|
||||
for capability in ability.capabilities
|
||||
if capability.status == "candidate"
|
||||
)
|
||||
candidate_source = "deterministic" if args.no_llm else "configured"
|
||||
return (
|
||||
f"repo={result.repository.id}:{result.repository.name} "
|
||||
f"latest_analysis_run={result.analysis_run.id} "
|
||||
f"candidate_source={candidate_source} "
|
||||
f"dry_run={result.dry_run} "
|
||||
f"cleared_approved={result.cleared_approved} "
|
||||
f"approved_superseded={result.previous_counts} "
|
||||
f"candidates={result.candidate_counts} "
|
||||
f"remaining_review_queue={remaining_review}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -215,7 +215,7 @@ class RegistryService:
|
||||
candidate_source = "deterministic"
|
||||
candidates = normalize_candidate_drafts(candidates)
|
||||
self.store.replace_candidate_graph(repository_id, completed_run.id, candidates)
|
||||
if candidate_source == "llm":
|
||||
if "llm" in candidate_source:
|
||||
log_operation(
|
||||
"llm_extraction_used",
|
||||
repository_id=repository_id,
|
||||
@@ -226,7 +226,10 @@ class RegistryService:
|
||||
repository_id,
|
||||
completed_run.id,
|
||||
action="llm_extraction_used",
|
||||
notes=f"Generated {len(candidates)} candidate ability draft(s).",
|
||||
notes=(
|
||||
f"Generated {len(candidates)} candidate ability draft(s) "
|
||||
f"from {candidate_source} candidate generation."
|
||||
),
|
||||
)
|
||||
if trusted_auto_approve:
|
||||
self.trusted_auto_approve_candidate_graph(
|
||||
@@ -260,11 +263,13 @@ class RegistryService:
|
||||
*,
|
||||
use_llm_assistance: bool = True,
|
||||
):
|
||||
deterministic = self.candidate_generator.generate(repository, facts, chunks)
|
||||
if use_llm_assistance and self.llm_extractor is not None:
|
||||
extracted = self.llm_extractor.extract(repository, chunks)
|
||||
if extracted:
|
||||
return self.llm_mapper.map(extracted, facts, chunks), "llm"
|
||||
return self.candidate_generator.generate(repository, facts, chunks), "deterministic"
|
||||
llm_candidates = self.llm_mapper.map(extracted, facts, chunks)
|
||||
return llm_candidates + deterministic, "llm+deterministic"
|
||||
return deterministic, "deterministic"
|
||||
|
||||
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
|
||||
return self.store.list_analysis_runs(repository_id)
|
||||
|
||||
@@ -75,12 +75,20 @@ class LLMCandidateExtractor:
|
||||
chunk_text = "\n\n".join(
|
||||
(
|
||||
f"Source: {chunk.path}:{chunk.start_line}-{chunk.end_line} "
|
||||
f"({chunk.kind})\n{chunk.text}"
|
||||
f"({chunk.kind}; source_role={self._source_role(chunk)})\n{chunk.text}"
|
||||
)
|
||||
for chunk in chunks[:12]
|
||||
for chunk in self._prompt_chunks(chunks)
|
||||
)
|
||||
return (
|
||||
"Extract a conservative, source-linked repository ability map.\n"
|
||||
"Use original repository utility only: capabilities the repository "
|
||||
"owns, intentionally exposes as a facade, or implements as an adapter.\n"
|
||||
"Prefer source_role=intent_summary, product_documentation, "
|
||||
"implementation_source, and test_evidence. Do not use SCOPE.md or "
|
||||
"source_role=derived_scope as primary evidence; it is a derived prior "
|
||||
"registry view and may be stale. Ignore agent guidance, CI/tooling, "
|
||||
"dependency-only, and mention-only context unless owned product "
|
||||
"evidence supports the same claim.\n"
|
||||
"Return strict JSON only with this shape:\n"
|
||||
"{\n"
|
||||
' "abilities": [\n'
|
||||
@@ -108,6 +116,46 @@ class LLMCandidateExtractor:
|
||||
f"{chunk_text}\n"
|
||||
)
|
||||
|
||||
def _prompt_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
|
||||
promptable = [
|
||||
chunk
|
||||
for chunk in chunks
|
||||
if self._source_role(chunk) not in {"agent_guidance", "derived_scope"}
|
||||
]
|
||||
return sorted(
|
||||
promptable,
|
||||
key=lambda chunk: (
|
||||
self._source_role_priority(self._source_role(chunk)),
|
||||
chunk.path,
|
||||
chunk.start_line,
|
||||
),
|
||||
)[:12]
|
||||
|
||||
def _source_role(self, chunk: ContentChunk) -> str:
|
||||
role = chunk.metadata.get("source_role")
|
||||
if isinstance(role, str) and role:
|
||||
return role
|
||||
path = chunk.path.lower()
|
||||
if path.endswith("intent.md"):
|
||||
return "intent_summary"
|
||||
if path.endswith("scope.md"):
|
||||
return "derived_scope"
|
||||
if path.endswith(("agents.md", "claude.md")) or "/.claude/" in path:
|
||||
return "agent_guidance"
|
||||
return ""
|
||||
|
||||
def _source_role_priority(self, source_role: str) -> int:
|
||||
priorities = {
|
||||
"intent_summary": 0,
|
||||
"product_documentation": 1,
|
||||
"implementation_source": 2,
|
||||
"test_evidence": 3,
|
||||
"configuration": 4,
|
||||
"dependency_declaration": 5,
|
||||
"ci_tooling": 6,
|
||||
}
|
||||
return priorities.get(source_role, 7)
|
||||
|
||||
def parse_response(self, content: str) -> list[ExtractedAbility]:
|
||||
try:
|
||||
payload = json.loads(self._json_text(content))
|
||||
|
||||
Reference in New Issue
Block a user