diff --git a/README.md b/README.md index 4166662..df4ca72 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Repository Ability Registry maps repositories from usefulness to implementat Ability -> Capability -> Feature -> Evidence -> Code location ``` -The first implementation slice is a Python registry core plus FastAPI HTTP API for manual repository profiles. It deliberately separates the manual/canonical registry path from the later analyzer pipeline. +The first implementation slice is a Python registry core plus FastAPI HTTP API and a small curator UI. Repository registration imports basic metadata from the repository itself, then analysis builds observed facts and candidate review entries. ## Local Development @@ -37,10 +37,10 @@ The API creates a local SQLite database at `var/repo-registry.sqlite3` by defaul ```bash curl -X POST http://127.0.0.1:8000/repos \ -H 'content-type: application/json' \ - -d '{"name":"MailRouter","url":"https://example.com/mail-router.git"}' + -d '{"url":"https://example.com/mail-router.git"}' ``` -Then add abilities, capabilities, features, and evidence under that repository and inspect: +The registry imports name and description from `pyproject.toml`, `package.json`, or README where possible. Then add abilities, capabilities, features, and evidence under that repository and inspect: ```bash curl http://127.0.0.1:8000/repos/1/ability-map diff --git a/pyproject.toml b/pyproject.toml index 21c5d96..c0ab035 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "fastapi>=0.115", + "python-multipart>=0.0.20", "uvicorn[standard]>=0.30", "pydantic-settings>=2.4", ] diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index 82fd825..57cd923 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -13,6 +13,7 @@ from repo_registry.core.models import ( ) from repo_registry.candidate_graph.generator import CandidateGraphGenerator from repo_registry.repo_ingestion.git import GitIngestionService +from repo_registry.repo_ingestion.metadata import RepositoryMetadataExtractor from repo_registry.repo_scanning.scanner import DeterministicScanner from repo_registry.storage.sqlite import RegistryStore @@ -28,20 +29,27 @@ class RegistryService: self.store = store self.scanner = DeterministicScanner() self.ingestion = ingestion or GitIngestionService() + self.metadata_extractor = RepositoryMetadataExtractor() self.candidate_generator = CandidateGraphGenerator() def register_repository( self, *, - name: str, url: str, + name: str | None = None, description: str | None = None, branch: str = "main", ) -> Repository: + if name is None or description is None: + checkout = self.ingestion.resolve(url, branch=branch) + metadata = self.metadata_extractor.extract(checkout.source_path, url) + else: + metadata = None return self.store.create_repository( - name=name, + name=name or (metadata.name if metadata is not None else "repository"), url=url, - description=description, + description=description + or (metadata.description if metadata is not None else None), branch=branch, ) diff --git a/src/repo_registry/repo_ingestion/metadata.py b/src/repo_registry/repo_ingestion/metadata.py new file mode 100644 index 0000000..f60f442 --- /dev/null +++ b/src/repo_registry/repo_ingestion/metadata.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import json +import tomllib +from dataclasses import dataclass +from pathlib import Path +from urllib.parse import urlparse + + +@dataclass(frozen=True) +class RepositoryMetadata: + name: str + description: str | None + + +class RepositoryMetadataExtractor: + def extract(self, source_path: str | Path, url: str) -> RepositoryMetadata: + root = Path(source_path) + pyproject = self._from_pyproject(root) + package = self._from_package_json(root) + readme = self._from_readme(root) + fallback_name = self._name_from_url_or_path(url) + + return RepositoryMetadata( + name=pyproject.name or package.name or readme.name or fallback_name, + description=( + pyproject.description + or package.description + or readme.description + ), + ) + + def _from_pyproject(self, root: Path) -> RepositoryMetadata: + path = root / "pyproject.toml" + if not path.exists(): + return RepositoryMetadata(name="", description=None) + try: + project = tomllib.loads(path.read_text(encoding="utf-8")).get("project", {}) + except (OSError, tomllib.TOMLDecodeError): + return RepositoryMetadata(name="", description=None) + return RepositoryMetadata( + name=str(project.get("name") or ""), + description=project.get("description"), + ) + + def _from_package_json(self, root: Path) -> RepositoryMetadata: + path = root / "package.json" + if not path.exists(): + return RepositoryMetadata(name="", description=None) + try: + package = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return RepositoryMetadata(name="", description=None) + return RepositoryMetadata( + name=str(package.get("name") or ""), + description=package.get("description"), + ) + + def _from_readme(self, root: Path) -> RepositoryMetadata: + for readme in sorted(root.glob("README*")): + if not readme.is_file(): + continue + try: + lines = readme.read_text(encoding="utf-8", errors="ignore").splitlines() + except OSError: + continue + title = "" + for line in lines: + stripped = line.strip() + cleaned = stripped.strip("#").strip() + if stripped.startswith("#") and cleaned and not title: + title = cleaned + continue + if cleaned: + return RepositoryMetadata(name=title, description=cleaned) + if title: + return RepositoryMetadata(name=title, description=None) + return RepositoryMetadata(name="", description=None) + + def _name_from_url_or_path(self, value: str) -> str: + parsed = urlparse(value) + path = parsed.path if parsed.scheme else value + name = Path(path.rstrip("/")).name or "repository" + if name.endswith(".git"): + name = name[:-4] + return name or "repository" diff --git a/src/repo_registry/web_api/app.py b/src/repo_registry/web_api/app.py index 033fa2b..5f69f5c 100644 --- a/src/repo_registry/web_api/app.py +++ b/src/repo_registry/web_api/app.py @@ -29,8 +29,8 @@ def get_service(settings: Settings = Depends(get_settings)) -> RegistryService: class RepositoryCreate(BaseModel): - name: str url: str + name: str | None = None description: str | None = None branch: str = "main" @@ -76,6 +76,11 @@ class CandidateGraphApproval(BaseModel): app = FastAPI(title="Repository Ability Registry", version="0.1.0") +from repo_registry.web_ui.views import router as ui_router + +app.include_router(ui_router) + + @app.get("/health") def health() -> dict[str, str]: return {"status": "ok"} @@ -88,7 +93,7 @@ def create_repository( ) -> dict[str, object]: try: repository = service.register_repository(**payload.model_dump()) - except ValueError as exc: + except (RuntimeError, ValueError) as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc return asdict(repository) diff --git a/src/repo_registry/web_ui/__init__.py b/src/repo_registry/web_ui/__init__.py new file mode 100644 index 0000000..fa3e50b --- /dev/null +++ b/src/repo_registry/web_ui/__init__.py @@ -0,0 +1 @@ +"""Small server-rendered curator UI.""" diff --git a/src/repo_registry/web_ui/views.py b/src/repo_registry/web_ui/views.py new file mode 100644 index 0000000..034c4d7 --- /dev/null +++ b/src/repo_registry/web_ui/views.py @@ -0,0 +1,403 @@ +from __future__ import annotations + +from dataclasses import asdict +from html import escape + +from fastapi import APIRouter, Depends, Form +from fastapi.responses import HTMLResponse, RedirectResponse + +from repo_registry.core.service import RegistryService +from repo_registry.web_api.app import get_service + + +router = APIRouter(include_in_schema=False) + + +def page(title: str, body: str) -> HTMLResponse: + return HTMLResponse( + f""" + + +
+ + +| Name | Status | Branch | Source |
|---|---|---|---|
| No repositories yet. | |||
{escape(repository.description or '')}
+{escape(repository.status)} {escape(repository.url)}
+| Run | Status | Started | Error |
|---|---|---|---|
| No runs yet. | |||
| Kind | Name | Path | Value |
|---|---|---|---|
| No observed facts. | |||
No candidates generated.
' + items = [] + for ability in abilities: + capabilities = "".join(render_candidate_capability(capability) for capability in ability["capabilities"]) + items.append( + f""" +{escape(ability['description'])}
+ {render_sources(ability['source_refs'])} +{escape(capability['description'])}
+ {render_sources(capability['source_refs'])} +No approved entries yet.
' + items = [] + for ability in abilities: + capabilities = [] + for capability in ability["capabilities"]: + features = "".join( + f'{escape(capability['description'])}
+{escape(ability['description'])}
+{sources}
" diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index df10137..df59f65 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -66,6 +66,7 @@ def test_search_matches_approved_abilities_and_capabilities(tmp_path): repository = service.register_repository( name="MailRouter", url="https://example.com/mail-router.git", + description="Manual test repository.", ) ability_id = service.add_ability( repository.id, @@ -87,10 +88,34 @@ def test_search_matches_approved_abilities_and_capabilities(tmp_path): assert results[0].match_name == "Classify Incoming Email" +def test_register_repository_imports_metadata_when_name_is_omitted(tmp_path): + source = tmp_path / "metadata-source" + source.mkdir() + (source / "pyproject.toml").write_text( + '[project]\nname = "metadata-source"\ndescription = "Imported description."\n', + encoding="utf-8", + ) + + service = make_service(tmp_path) + + repository = service.register_repository(url=str(source)) + + assert repository.name == "metadata-source" + assert repository.description == "Imported description." + + def test_capability_must_belong_to_repository(tmp_path): service = make_service(tmp_path) - first = service.register_repository(name="First", url="https://example.com/first.git") - second = service.register_repository(name="Second", url="https://example.com/second.git") + first = service.register_repository( + name="First", + url="https://example.com/first.git", + description="Manual first repository.", + ) + second = service.register_repository( + name="Second", + url="https://example.com/second.git", + description="Manual second repository.", + ) ability_id = service.add_ability(first.id, name="Document Classification") try: @@ -186,6 +211,7 @@ def test_analyze_repository_failure_is_recorded(tmp_path): repository = service.register_repository( name="Missing", url=str(tmp_path / "does-not-exist"), + description="Manual missing repository.", ) summary = service.analyze_repository(repository.id) diff --git a/tests/test_repository_metadata.py b/tests/test_repository_metadata.py new file mode 100644 index 0000000..9ad09dd --- /dev/null +++ b/tests/test_repository_metadata.py @@ -0,0 +1,43 @@ +from repo_registry.repo_ingestion.metadata import RepositoryMetadataExtractor + + +def test_metadata_prefers_pyproject(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + (repo / "pyproject.toml").write_text( + '[project]\nname = "invoice-tools"\ndescription = "Extract invoice data."\n', + encoding="utf-8", + ) + + metadata = RepositoryMetadataExtractor().extract(repo, str(repo)) + + assert metadata.name == "invoice-tools" + assert metadata.description == "Extract invoice data." + + +def test_metadata_uses_package_json(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + (repo / "package.json").write_text( + '{"name":"frontend-registry","description":"Browse repository abilities."}', + encoding="utf-8", + ) + + metadata = RepositoryMetadataExtractor().extract(repo, str(repo)) + + assert metadata.name == "frontend-registry" + assert metadata.description == "Browse repository abilities." + + +def test_metadata_falls_back_to_readme_title(tmp_path): + repo = tmp_path / "repo-name" + repo.mkdir() + (repo / "README.md").write_text( + "# Useful Registry\n\nExtra details follow.\n", + encoding="utf-8", + ) + + metadata = RepositoryMetadataExtractor().extract(repo, str(repo)) + + assert metadata.name == "Useful Registry" + assert metadata.description == "Extra details follow." diff --git a/tests/test_web_api.py b/tests/test_web_api.py index 6d656c9..2ff6b44 100644 --- a/tests/test_web_api.py +++ b/tests/test_web_api.py @@ -72,6 +72,33 @@ def test_api_manual_registry_loop(tmp_path): app.dependency_overrides.clear() +def test_api_registers_repository_from_url_metadata(tmp_path): + source = tmp_path / "metadata-api" + source.mkdir() + (source / "package.json").write_text( + '{"name":"metadata-api","description":"Imported through the API."}', + encoding="utf-8", + ) + + def override_settings(): + return Settings( + database_path=str(tmp_path / "metadata-api.sqlite3"), + checkout_root=str(tmp_path / "metadata-api-checkouts"), + ) + + app.dependency_overrides[get_settings] = override_settings + client = TestClient(app) + try: + response = client.post("/repos", json={"url": str(source)}) + + assert response.status_code == 201 + repository = response.json() + assert repository["name"] == "metadata-api" + assert repository["description"] == "Imported through the API." + finally: + app.dependency_overrides.clear() + + def test_api_analysis_run_loop(tmp_path): source = tmp_path / "repo" source.mkdir() @@ -139,3 +166,70 @@ def test_api_analysis_run_loop(tmp_path): assert ("framework", "Vite", "package.json") in fact_names finally: app.dependency_overrides.clear() + + +def test_ui_register_analyze_and_approve_loop(tmp_path): + source = tmp_path / "repo" + source.mkdir() + (source / "README.md").write_text("# UI Repo\n", encoding="utf-8") + (source / "app.py").write_text( + "from fastapi import FastAPI\n" + "app = FastAPI()\n" + '@app.get("/status")\n' + "def status():\n" + " return {}\n", + encoding="utf-8", + ) + + def override_settings(): + return Settings( + database_path=str(tmp_path / "ui.sqlite3"), + checkout_root=str(tmp_path / "ui-checkouts"), + ) + + app.dependency_overrides[get_settings] = override_settings + client = TestClient(app) + try: + index_response = client.get("/ui") + assert index_response.status_code == 200 + assert "Register Repository" in index_response.text + + create_response = client.post( + "/ui/repos", + data={ + "url": str(source), + "branch": "main", + }, + follow_redirects=False, + ) + assert create_response.status_code == 303 + repository_path = create_response.headers["location"] + + detail_response = client.get(repository_path) + assert detail_response.status_code == 200 + assert "Run Analysis" in detail_response.text + + run_response = client.post( + f"{repository_path}/analysis-runs", + data={"source_path": ""}, + follow_redirects=False, + ) + assert run_response.status_code == 303 + run_path = run_response.headers["location"] + + run_detail = client.get(run_path) + assert run_detail.status_code == 200 + assert "Candidate Graph" in run_detail.text + + approve_response = client.post( + f"{run_path}/candidate-graph/approve", + follow_redirects=False, + ) + assert approve_response.status_code == 303 + + approved_detail = client.get(approve_response.headers["location"]) + assert approved_detail.status_code == 200 + assert "Approved Ability Map" in approved_detail.text + assert "Review UI Repo Repository Usefulness" in approved_detail.text + finally: + app.dependency_overrides.clear()