diff --git a/src/repo_registry/candidate_graph/generator.py b/src/repo_registry/candidate_graph/generator.py index af4da2b..04399fd 100644 --- a/src/repo_registry/candidate_graph/generator.py +++ b/src/repo_registry/candidate_graph/generator.py @@ -125,16 +125,7 @@ class CandidateGraphGenerator: docs: list[ObservedFact], chunks: list[ContentChunk], ) -> CandidateCapabilityDraft: - features = [ - CandidateFeatureDraft( - name=self._feature_name(fact, chunks), - type=self._feature_type(fact), - location=fact.path, - confidence=0.65 if fact.value else 0.45, - source_refs=self._source_refs([fact]), - ) - for fact in interfaces - ] + features = self._interface_features(interfaces, chunks) return CandidateCapabilityDraft( name="Expose Repository Interface", description=self._interface_description(chunks), @@ -151,6 +142,83 @@ class CandidateGraphGenerator: evidence=self._evidence(tests, examples, docs), ) + def _interface_features( + self, + interfaces: list[ObservedFact], + chunks: list[ContentChunk], + ) -> list[CandidateFeatureDraft]: + by_type: dict[str, list[ObservedFact]] = {} + for fact in interfaces: + by_type.setdefault(self._feature_type(fact), []).append(fact) + + features: list[CandidateFeatureDraft] = [] + for feature_type, facts in sorted(by_type.items()): + if len(facts) == 1: + fact = facts[0] + features.append( + CandidateFeatureDraft( + name=self._feature_name(fact, chunks), + type=feature_type, + location=fact.path, + confidence=0.65 if fact.value else 0.45, + source_refs=self._source_refs([fact]), + ) + ) + continue + + features.append( + CandidateFeatureDraft( + name=self._grouped_interface_feature_name( + feature_type, + facts, + chunks, + ), + type=feature_type, + location=self._grouped_location(facts), + confidence=self._grouped_interface_confidence(facts), + source_refs=self._source_refs(facts), + ) + ) + return features + + def _grouped_interface_feature_name( + self, + feature_type: str, + facts: list[ObservedFact], + chunks: list[ContentChunk], + ) -> str: + summary = self._grouped_interface_summary(facts, chunks) + if feature_type == "API": + return f"HTTP API surface: {summary}" + if feature_type == "CLI": + return f"CLI command surface: {summary}" + return f"Callable interface surface: {summary}" + + def _grouped_interface_summary( + self, + facts: list[ObservedFact], + chunks: list[ContentChunk], + ) -> str: + names = [self._feature_name(fact, chunks) for fact in facts] + compact_names = [name for name in names if name] + if not compact_names: + return f"{len(facts)} entry points" + visible = compact_names[:3] + suffix = f", +{len(compact_names) - 3} more" if len(compact_names) > 3 else "" + return f"{', '.join(visible)}{suffix}" + + def _grouped_location(self, facts: list[ObservedFact]) -> str: + paths = sorted({fact.path for fact in facts if fact.path}) + if not paths: + return "" + if len(paths) == 1: + return paths[0] + return "multiple files" + + def _grouped_interface_confidence(self, facts: list[ObservedFact]) -> float: + valued = sum(1 for fact in facts if fact.value) + return 0.7 if valued == len(facts) else 0.55 + def _evidence( self, tests: list[ObservedFact], diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index 2b4c955..e86525e 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -120,6 +120,7 @@ class RegistryService: repository_id: int, *, source_path: str | None = None, + use_cached_checkout: bool = False, access_username: str | None = None, access_password: str | None = None, ) -> ScanSummary: @@ -134,12 +135,20 @@ class RegistryService: ) try: if source_path is None: - checkout = self.ingestion.resolve( - repository.url, - branch=repository.branch, - access_username=access_username, - access_password=access_password, - ) + if use_cached_checkout: + checkout = self.ingestion.cached_checkout(repository.url) + if checkout is None: + raise RuntimeError( + "cached checkout was requested, but no checkout exists " + "for this repository" + ) + else: + checkout = self.ingestion.resolve( + repository.url, + branch=repository.branch, + access_username=access_username, + access_password=access_password, + ) scan_source = checkout.source_path else: scan_source = source_path diff --git a/src/repo_registry/repo_ingestion/git.py b/src/repo_registry/repo_ingestion/git.py index d7b7fd5..374e3a7 100644 --- a/src/repo_registry/repo_ingestion/git.py +++ b/src/repo_registry/repo_ingestion/git.py @@ -57,6 +57,16 @@ class GitIngestionService: ) return Checkout(source_path=checkout_path.resolve(), was_cloned=True) + def cached_checkout(self, url_or_path: str) -> Checkout | None: + local_path = self._local_path(url_or_path) + if local_path is not None: + return Checkout(source_path=local_path.resolve(), was_cloned=False) + + checkout_path = self.checkout_root / self._checkout_key(url_or_path) + if not checkout_path.exists(): + return None + return Checkout(source_path=checkout_path.resolve(), was_cloned=True) + def _checkout_branch( self, checkout_path: Path, diff --git a/src/repo_registry/web_api/app.py b/src/repo_registry/web_api/app.py index 0908c79..448b14c 100644 --- a/src/repo_registry/web_api/app.py +++ b/src/repo_registry/web_api/app.py @@ -245,6 +245,7 @@ def create_analysis_run( summary = service.analyze_repository( repository_id, source_path=payload.source_path, + use_cached_checkout=payload.use_cached_checkout, access_username=payload.access_username, access_password=payload.access_password, ) diff --git a/src/repo_registry/web_api/schemas.py b/src/repo_registry/web_api/schemas.py index 03d65c1..4800ba3 100644 --- a/src/repo_registry/web_api/schemas.py +++ b/src/repo_registry/web_api/schemas.py @@ -202,6 +202,7 @@ class EvidenceUpdate(BaseModel): class AnalysisRunCreate(BaseModel): source_path: str | None = None + use_cached_checkout: bool = False access_username: str | None = None access_password: str | None = Field(default=None, repr=False) @@ -210,6 +211,7 @@ class AnalysisRunCreate(BaseModel): "examples": [ {}, {"source_path": "/path/to/local/repository"}, + {"use_cached_checkout": True}, { "access_username": "git-user", "access_password": "access-token", diff --git a/src/repo_registry/web_ui/views.py b/src/repo_registry/web_ui/views.py index 1e381e8..42fe8e8 100644 --- a/src/repo_registry/web_ui/views.py +++ b/src/repo_registry/web_ui/views.py @@ -501,6 +501,7 @@ def repository_detail(