generated from coulomb/repo-seed
Repo stats and features as aggregates
This commit is contained in:
@@ -125,16 +125,7 @@ class CandidateGraphGenerator:
|
||||
docs: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> CandidateCapabilityDraft:
|
||||
features = [
|
||||
CandidateFeatureDraft(
|
||||
name=self._feature_name(fact, chunks),
|
||||
type=self._feature_type(fact),
|
||||
location=fact.path,
|
||||
confidence=0.65 if fact.value else 0.45,
|
||||
source_refs=self._source_refs([fact]),
|
||||
)
|
||||
for fact in interfaces
|
||||
]
|
||||
features = self._interface_features(interfaces, chunks)
|
||||
return CandidateCapabilityDraft(
|
||||
name="Expose Repository Interface",
|
||||
description=self._interface_description(chunks),
|
||||
@@ -151,6 +142,83 @@ class CandidateGraphGenerator:
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
|
||||
def _interface_features(
|
||||
self,
|
||||
interfaces: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> list[CandidateFeatureDraft]:
|
||||
by_type: dict[str, list[ObservedFact]] = {}
|
||||
for fact in interfaces:
|
||||
by_type.setdefault(self._feature_type(fact), []).append(fact)
|
||||
|
||||
features: list[CandidateFeatureDraft] = []
|
||||
for feature_type, facts in sorted(by_type.items()):
|
||||
if len(facts) == 1:
|
||||
fact = facts[0]
|
||||
features.append(
|
||||
CandidateFeatureDraft(
|
||||
name=self._feature_name(fact, chunks),
|
||||
type=feature_type,
|
||||
location=fact.path,
|
||||
confidence=0.65 if fact.value else 0.45,
|
||||
source_refs=self._source_refs([fact]),
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
features.append(
|
||||
CandidateFeatureDraft(
|
||||
name=self._grouped_interface_feature_name(
|
||||
feature_type,
|
||||
facts,
|
||||
chunks,
|
||||
),
|
||||
type=feature_type,
|
||||
location=self._grouped_location(facts),
|
||||
confidence=self._grouped_interface_confidence(facts),
|
||||
source_refs=self._source_refs(facts),
|
||||
)
|
||||
)
|
||||
return features
|
||||
|
||||
def _grouped_interface_feature_name(
|
||||
self,
|
||||
feature_type: str,
|
||||
facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> str:
|
||||
summary = self._grouped_interface_summary(facts, chunks)
|
||||
if feature_type == "API":
|
||||
return f"HTTP API surface: {summary}"
|
||||
if feature_type == "CLI":
|
||||
return f"CLI command surface: {summary}"
|
||||
return f"Callable interface surface: {summary}"
|
||||
|
||||
def _grouped_interface_summary(
|
||||
self,
|
||||
facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> str:
|
||||
names = [self._feature_name(fact, chunks) for fact in facts]
|
||||
compact_names = [name for name in names if name]
|
||||
if not compact_names:
|
||||
return f"{len(facts)} entry points"
|
||||
visible = compact_names[:3]
|
||||
suffix = f", +{len(compact_names) - 3} more" if len(compact_names) > 3 else ""
|
||||
return f"{', '.join(visible)}{suffix}"
|
||||
|
||||
def _grouped_location(self, facts: list[ObservedFact]) -> str:
|
||||
paths = sorted({fact.path for fact in facts if fact.path})
|
||||
if not paths:
|
||||
return ""
|
||||
if len(paths) == 1:
|
||||
return paths[0]
|
||||
return "multiple files"
|
||||
|
||||
def _grouped_interface_confidence(self, facts: list[ObservedFact]) -> float:
|
||||
valued = sum(1 for fact in facts if fact.value)
|
||||
return 0.7 if valued == len(facts) else 0.55
|
||||
|
||||
def _evidence(
|
||||
self,
|
||||
tests: list[ObservedFact],
|
||||
|
||||
@@ -120,6 +120,7 @@ class RegistryService:
|
||||
repository_id: int,
|
||||
*,
|
||||
source_path: str | None = None,
|
||||
use_cached_checkout: bool = False,
|
||||
access_username: str | None = None,
|
||||
access_password: str | None = None,
|
||||
) -> ScanSummary:
|
||||
@@ -134,12 +135,20 @@ class RegistryService:
|
||||
)
|
||||
try:
|
||||
if source_path is None:
|
||||
checkout = self.ingestion.resolve(
|
||||
repository.url,
|
||||
branch=repository.branch,
|
||||
access_username=access_username,
|
||||
access_password=access_password,
|
||||
)
|
||||
if use_cached_checkout:
|
||||
checkout = self.ingestion.cached_checkout(repository.url)
|
||||
if checkout is None:
|
||||
raise RuntimeError(
|
||||
"cached checkout was requested, but no checkout exists "
|
||||
"for this repository"
|
||||
)
|
||||
else:
|
||||
checkout = self.ingestion.resolve(
|
||||
repository.url,
|
||||
branch=repository.branch,
|
||||
access_username=access_username,
|
||||
access_password=access_password,
|
||||
)
|
||||
scan_source = checkout.source_path
|
||||
else:
|
||||
scan_source = source_path
|
||||
|
||||
@@ -57,6 +57,16 @@ class GitIngestionService:
|
||||
)
|
||||
return Checkout(source_path=checkout_path.resolve(), was_cloned=True)
|
||||
|
||||
def cached_checkout(self, url_or_path: str) -> Checkout | None:
|
||||
local_path = self._local_path(url_or_path)
|
||||
if local_path is not None:
|
||||
return Checkout(source_path=local_path.resolve(), was_cloned=False)
|
||||
|
||||
checkout_path = self.checkout_root / self._checkout_key(url_or_path)
|
||||
if not checkout_path.exists():
|
||||
return None
|
||||
return Checkout(source_path=checkout_path.resolve(), was_cloned=True)
|
||||
|
||||
def _checkout_branch(
|
||||
self,
|
||||
checkout_path: Path,
|
||||
|
||||
@@ -245,6 +245,7 @@ def create_analysis_run(
|
||||
summary = service.analyze_repository(
|
||||
repository_id,
|
||||
source_path=payload.source_path,
|
||||
use_cached_checkout=payload.use_cached_checkout,
|
||||
access_username=payload.access_username,
|
||||
access_password=payload.access_password,
|
||||
)
|
||||
|
||||
@@ -202,6 +202,7 @@ class EvidenceUpdate(BaseModel):
|
||||
|
||||
class AnalysisRunCreate(BaseModel):
|
||||
source_path: str | None = None
|
||||
use_cached_checkout: bool = False
|
||||
access_username: str | None = None
|
||||
access_password: str | None = Field(default=None, repr=False)
|
||||
|
||||
@@ -210,6 +211,7 @@ class AnalysisRunCreate(BaseModel):
|
||||
"examples": [
|
||||
{},
|
||||
{"source_path": "/path/to/local/repository"},
|
||||
{"use_cached_checkout": True},
|
||||
{
|
||||
"access_username": "git-user",
|
||||
"access_password": "access-token",
|
||||
|
||||
@@ -501,6 +501,7 @@ def repository_detail(
|
||||
<h2>Run Analysis</h2>
|
||||
<form class="stack" method="post" action="/ui/repos/{repository_id}/analysis-runs">
|
||||
<label>Override source path <input name="source_path" placeholder="Optional local path"></label>
|
||||
<label class="checkbox"><input type="checkbox" name="use_cached_checkout" value="1"> Analyze cached checkout without fetching upstream</label>
|
||||
<label>Username <input name="access_username" autocomplete="username" placeholder="Optional for private HTTP(S) repos"></label>
|
||||
<label>Password or access token <input name="access_password" type="password" autocomplete="current-password" placeholder="Used for this Git operation only"></label>
|
||||
<div class="actions">
|
||||
@@ -516,6 +517,7 @@ def repository_detail(
|
||||
</section>
|
||||
<section class="panel">
|
||||
<h2>Approved Ability Map</h2>
|
||||
{render_graph_counts(asdict(ability_map), facts_count=None)}
|
||||
{render_ability_map(asdict(ability_map), repository_id)}
|
||||
</section>
|
||||
</div>
|
||||
@@ -821,6 +823,7 @@ def delete_evidence_from_form(
|
||||
def create_analysis_run_from_form(
|
||||
repository_id: int,
|
||||
source_path: str = Form(""),
|
||||
use_cached_checkout: str | None = Form(None),
|
||||
access_username: str = Form(""),
|
||||
access_password: str = Form(""),
|
||||
service: RegistryService = Depends(get_service),
|
||||
@@ -828,6 +831,7 @@ def create_analysis_run_from_form(
|
||||
summary = service.analyze_repository(
|
||||
repository_id,
|
||||
source_path=source_path or None,
|
||||
use_cached_checkout=bool(use_cached_checkout),
|
||||
access_username=access_username or None,
|
||||
access_password=access_password or None,
|
||||
)
|
||||
@@ -869,6 +873,7 @@ def analysis_run_detail(
|
||||
<section class="panel">
|
||||
<div class="actions">
|
||||
<h2 style="margin-right:auto">Candidate Graph</h2>
|
||||
{render_graph_counts(asdict(candidate_graph), facts_count=len(facts))}
|
||||
<form method="post" action="/ui/repos/{repository_id}/analysis-runs/{analysis_run_id}/candidate-graph/approve">
|
||||
<button type="submit">Approve</button>
|
||||
</form>
|
||||
@@ -876,7 +881,10 @@ def analysis_run_detail(
|
||||
{render_candidate_graph(asdict(candidate_graph), repository_id, analysis_run_id)}
|
||||
</section>
|
||||
<section class="panel">
|
||||
<h2>Observed Facts</h2>
|
||||
<div class="actions">
|
||||
<h2 style="margin-right:auto">Observed Facts</h2>
|
||||
{render_count_pills(facts=len(facts))}
|
||||
</div>
|
||||
<table>
|
||||
<thead><tr><th>Kind</th><th>Name</th><th>Path</th><th>Value</th></tr></thead>
|
||||
<tbody>{fact_rows or '<tr><td colspan="4" class="muted">No observed facts.</td></tr>'}</tbody>
|
||||
@@ -1565,6 +1573,41 @@ def split_capability_lines(value: str) -> list[str]:
|
||||
return [line.strip() for line in normalized.splitlines() if line.strip()]
|
||||
|
||||
|
||||
def render_graph_counts(graph: dict, facts_count: int | None = None) -> str:
|
||||
abilities = graph.get("abilities", [])
|
||||
capabilities = [
|
||||
capability
|
||||
for ability in abilities
|
||||
for capability in ability.get("capabilities", [])
|
||||
]
|
||||
features = [
|
||||
feature
|
||||
for capability in capabilities
|
||||
for feature in capability.get("features", [])
|
||||
]
|
||||
counts: dict[str, int] = {
|
||||
"abilities": len(abilities),
|
||||
"capabilities": len(capabilities),
|
||||
"features": len(features),
|
||||
}
|
||||
if facts_count is not None:
|
||||
counts["facts"] = facts_count
|
||||
return render_count_pills(**counts)
|
||||
|
||||
|
||||
def render_count_pills(**counts: int) -> str:
|
||||
labels = {
|
||||
"abilities": "abilities",
|
||||
"capabilities": "capabilities",
|
||||
"features": "features",
|
||||
"facts": "facts",
|
||||
}
|
||||
return "".join(
|
||||
f'<span class="pill">{count} {labels[name]}</span>'
|
||||
for name, count in counts.items()
|
||||
)
|
||||
|
||||
|
||||
def render_candidate_graph(graph: dict, repository_id: int, analysis_run_id: int) -> str:
|
||||
abilities = graph.get("abilities", [])
|
||||
if not abilities:
|
||||
|
||||
@@ -208,3 +208,39 @@ def test_candidate_generator_uses_generic_io_for_unknown_interfaces():
|
||||
capability = graph[0].capabilities[0]
|
||||
assert capability.inputs == ["caller input"]
|
||||
assert capability.outputs == ["callable interface result"]
|
||||
|
||||
|
||||
def test_candidate_generator_groups_many_interface_facts_into_behavioral_features():
|
||||
repository = Repository(
|
||||
id=1,
|
||||
name="Registry",
|
||||
url="/tmp/registry",
|
||||
description=None,
|
||||
branch="main",
|
||||
status="analyzed",
|
||||
)
|
||||
facts = [
|
||||
fact(1, "documentation", "README", "README.md"),
|
||||
fact(2, "interface", "python route decorator", "src/api.py", '@app.get("/repos")'),
|
||||
fact(3, "interface", "python route decorator", "src/api.py", '@app.post("/repos")'),
|
||||
fact(
|
||||
4,
|
||||
"interface",
|
||||
"python route decorator",
|
||||
"src/api.py",
|
||||
'@app.post("/repos/{repository_id}/analysis-runs")',
|
||||
),
|
||||
fact(5, "test", "test_api.py", "tests/test_api.py"),
|
||||
]
|
||||
|
||||
graph = CandidateGraphGenerator().generate(repository, facts)
|
||||
|
||||
capability = graph[0].capabilities[0]
|
||||
assert len(capability.features) == 1
|
||||
feature = capability.features[0]
|
||||
assert feature.name == (
|
||||
"HTTP API surface: GET /repos, POST /repos, POST /repos/{repository_id}/analysis-runs"
|
||||
)
|
||||
assert feature.type == "API"
|
||||
assert feature.location == "src/api.py"
|
||||
assert len(feature.source_refs) == 3
|
||||
|
||||
@@ -267,7 +267,6 @@ def test_search_filters_by_status_language_and_framework(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
service = make_service(tmp_path)
|
||||
repository = service.register_repository(name="Filterable", url=str(source))
|
||||
summary = service.analyze_repository(repository.id)
|
||||
@@ -327,7 +326,7 @@ def test_fixture_breadth_python_cli_repo_extracts_reviewable_cli_claims(tmp_path
|
||||
assert summary.analysis_run.status == "completed"
|
||||
assert capability.name == "Expose Repository Interface"
|
||||
assert capability.features[0].type == "CLI"
|
||||
assert capability.features[0].name == "CLI command main"
|
||||
assert capability.features[0].name.startswith("CLI command surface:")
|
||||
assert capability.evidence[0].reference == "tests/test_cli.py"
|
||||
assert service.ability_map(repository.id).abilities == []
|
||||
|
||||
@@ -523,7 +522,6 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
service = make_service(tmp_path)
|
||||
repository = service.register_repository(
|
||||
name="Example",
|
||||
@@ -660,6 +658,13 @@ def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "cli.py").write_text(
|
||||
"import click\n\n"
|
||||
"@click.command()\n"
|
||||
"def health():\n"
|
||||
" click.echo('ok')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
service = make_service(tmp_path)
|
||||
repository = service.register_repository(name="Example", url=str(source))
|
||||
@@ -1150,6 +1155,13 @@ def test_merge_candidate_feature_and_evidence_omits_duplicate_leaves(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "cli.py").write_text(
|
||||
"import click\n\n"
|
||||
"@click.command()\n"
|
||||
"def health():\n"
|
||||
" click.echo('ok')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
service = make_service(tmp_path)
|
||||
repository = service.register_repository(name="Merge Leaves", url=str(source))
|
||||
@@ -1224,6 +1236,36 @@ def test_analyze_repository_clones_git_url_before_scanning(tmp_path):
|
||||
assert ("framework", "pytest", "requirements.txt") in fact_names
|
||||
|
||||
|
||||
def test_analyze_repository_can_use_cached_checkout_without_fetching(tmp_path, monkeypatch):
|
||||
service = make_service(tmp_path)
|
||||
url = "https://example.com/private/repo.git"
|
||||
cached = tmp_path / "checkouts" / "repo-b5d250ec3c59"
|
||||
cached.mkdir(parents=True)
|
||||
(cached / "README.md").write_text("# Cached Repo\n", encoding="utf-8")
|
||||
|
||||
def fail_run_git(*args, **kwargs):
|
||||
raise AssertionError("cached analysis should not run git")
|
||||
|
||||
monkeypatch.setattr(service.ingestion, "_run_git", fail_run_git)
|
||||
repository = service.register_repository(
|
||||
name="Cached",
|
||||
url=url,
|
||||
description="Already cloned.",
|
||||
)
|
||||
|
||||
summary = service.analyze_repository(
|
||||
repository.id,
|
||||
use_cached_checkout=True,
|
||||
)
|
||||
|
||||
assert summary.analysis_run.status == "completed"
|
||||
assert summary.snapshot is not None
|
||||
assert str(cached) == summary.snapshot.source_path
|
||||
assert ("documentation", "README", "README.md") in {
|
||||
(fact.kind, fact.name, fact.path) for fact in summary.facts
|
||||
}
|
||||
|
||||
|
||||
def test_operational_logging_records_analysis_and_review_events(caplog, tmp_path):
|
||||
source = tmp_path / "repo"
|
||||
source.mkdir()
|
||||
|
||||
@@ -1031,7 +1031,10 @@ def test_api_source_linked_candidate_and_repo_update_loop(tmp_path):
|
||||
for capability in ability["capabilities"]
|
||||
for feature in capability["features"]
|
||||
}
|
||||
assert {"GET /status", "GET /ready"} <= second_features
|
||||
assert any(
|
||||
"GET /status" in feature_name and "GET /ready" in feature_name
|
||||
for feature_name in second_features
|
||||
)
|
||||
|
||||
approved_after_reanalysis = client.get(f"/repos/{repository_id}/ability-map")
|
||||
assert approved_after_reanalysis.status_code == 200
|
||||
@@ -1060,6 +1063,13 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "cli.py").write_text(
|
||||
"import click\n\n"
|
||||
"@click.command()\n"
|
||||
"def status():\n"
|
||||
" click.echo('ok')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def override_settings():
|
||||
return Settings(
|
||||
@@ -1095,6 +1105,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
|
||||
assert detail_response.status_code == 200
|
||||
assert "Run Analysis" in detail_response.text
|
||||
assert "Running analysis..." in detail_response.text
|
||||
assert "Analyze cached checkout without fetching upstream" in detail_response.text
|
||||
assert "Repository Metadata" in detail_response.text
|
||||
|
||||
edit_repository_response = client.post(
|
||||
@@ -1127,6 +1138,10 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
|
||||
run_detail = client.get(run_path)
|
||||
assert run_detail.status_code == 200
|
||||
assert "Candidate Graph" in run_detail.text
|
||||
assert "1 abilities" in run_detail.text
|
||||
assert "2 capabilities" in run_detail.text
|
||||
assert "2 features" in run_detail.text
|
||||
assert "7 facts" in run_detail.text
|
||||
assert "Content Chunks" in run_detail.text
|
||||
assert "README.md:1-1" in run_detail.text
|
||||
assert "ID " in run_detail.text
|
||||
@@ -1141,6 +1156,9 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
|
||||
approved_detail = client.get(approve_response.headers["location"])
|
||||
assert approved_detail.status_code == 200
|
||||
assert "Approved Ability Map" in approved_detail.text
|
||||
assert "1 abilities" in approved_detail.text
|
||||
assert "2 capabilities" in approved_detail.text
|
||||
assert "2 features" in approved_detail.text
|
||||
assert "Review UI Repo Edited Repository Usefulness" in approved_detail.text
|
||||
assert "Language: Python" in approved_detail.text
|
||||
assert "Framework: FastAPI" in approved_detail.text
|
||||
@@ -1635,7 +1653,6 @@ def test_api_rejects_candidate_capability_feature_and_evidence(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def override_settings():
|
||||
return Settings(
|
||||
database_path=str(tmp_path / "api-reject.sqlite3"),
|
||||
@@ -1723,7 +1740,6 @@ def test_api_relinks_candidate_feature_and_evidence(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def override_settings():
|
||||
return Settings(
|
||||
database_path=str(tmp_path / "api-relink.sqlite3"),
|
||||
@@ -1798,6 +1814,13 @@ def test_api_merges_candidate_capability_feature_and_evidence(tmp_path):
|
||||
" return {}\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "cli.py").write_text(
|
||||
"import click\n\n"
|
||||
"@click.command()\n"
|
||||
"def status_cli():\n"
|
||||
" click.echo('ok')\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def override_settings():
|
||||
return Settings(
|
||||
|
||||
@@ -45,7 +45,7 @@ analysis run in one flow and land on the reviewable candidate graph.
|
||||
|
||||
```task
|
||||
id: RREG-WP-0003-T02
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "d0d98e1b-8d21-4bdf-af58-edbb34e8a929"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user