generated from coulomb/repo-seed
Git ingestion part of Milestone 2
This commit is contained in:
@@ -10,6 +10,7 @@ from repo_registry.core.models import (
|
||||
ScanSummary,
|
||||
SearchResult,
|
||||
)
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.repo_scanning.scanner import DeterministicScanner
|
||||
from repo_registry.storage.sqlite import RegistryStore
|
||||
|
||||
@@ -17,9 +18,14 @@ from repo_registry.storage.sqlite import RegistryStore
|
||||
class RegistryService:
|
||||
"""Application service for the manual registry MVP."""
|
||||
|
||||
def __init__(self, store: RegistryStore) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
store: RegistryStore,
|
||||
ingestion: GitIngestionService | None = None,
|
||||
) -> None:
|
||||
self.store = store
|
||||
self.scanner = DeterministicScanner()
|
||||
self.ingestion = ingestion or GitIngestionService()
|
||||
|
||||
def register_repository(
|
||||
self,
|
||||
@@ -52,7 +58,12 @@ class RegistryService:
|
||||
run = self.store.create_analysis_run(repository_id)
|
||||
self.store.update_repository_status(repository_id, "analyzing")
|
||||
try:
|
||||
scan_result = self.scanner.scan(source_path or repository.url)
|
||||
if source_path is None:
|
||||
checkout = self.ingestion.resolve(repository.url, branch=repository.branch)
|
||||
scan_source = checkout.source_path
|
||||
else:
|
||||
scan_source = source_path
|
||||
scan_result = self.scanner.scan(scan_source)
|
||||
except Exception as exc:
|
||||
failed_run = self.store.fail_analysis_run(repository_id, run.id, str(exc))
|
||||
return ScanSummary(analysis_run=failed_run, snapshot=None, facts=[])
|
||||
|
||||
1
src/repo_registry/repo_ingestion/__init__.py
Normal file
1
src/repo_registry/repo_ingestion/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Repository checkout and ingestion helpers."""
|
||||
76
src/repo_registry/repo_ingestion/git.py
Normal file
76
src/repo_registry/repo_ingestion/git.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Checkout:
|
||||
source_path: Path
|
||||
was_cloned: bool
|
||||
|
||||
|
||||
class GitIngestionService:
|
||||
def __init__(self, checkout_root: str | Path = "var/checkouts") -> None:
|
||||
self.checkout_root = Path(checkout_root)
|
||||
|
||||
def resolve(self, url_or_path: str, *, branch: str = "main") -> Checkout:
|
||||
local_path = self._local_path(url_or_path)
|
||||
if local_path is not None:
|
||||
return Checkout(source_path=local_path.resolve(), was_cloned=False)
|
||||
|
||||
checkout_path = self.checkout_root / self._checkout_key(url_or_path)
|
||||
self.checkout_root.mkdir(parents=True, exist_ok=True)
|
||||
if checkout_path.exists():
|
||||
self._run_git(["fetch", "--all", "--prune"], cwd=checkout_path)
|
||||
else:
|
||||
self._run_git(["clone", url_or_path, str(checkout_path)], cwd=None)
|
||||
|
||||
self._checkout_branch(checkout_path, branch)
|
||||
return Checkout(source_path=checkout_path.resolve(), was_cloned=True)
|
||||
|
||||
def _checkout_branch(self, checkout_path: Path, branch: str) -> None:
|
||||
if branch:
|
||||
self._run_git(["checkout", branch], cwd=checkout_path)
|
||||
self._run_git(["pull", "--ff-only"], cwd=checkout_path)
|
||||
|
||||
def _local_path(self, value: str) -> Path | None:
|
||||
parsed = urlparse(value)
|
||||
if parsed.scheme:
|
||||
return None
|
||||
|
||||
path = Path(value).expanduser()
|
||||
if path.exists():
|
||||
return path
|
||||
return None
|
||||
|
||||
def _checkout_key(self, url: str) -> str:
|
||||
parsed = urlparse(url)
|
||||
name = Path(parsed.path.rstrip("/")).name or "repository"
|
||||
if name.endswith(".git"):
|
||||
name = name[:-4]
|
||||
digest = hashlib.sha256(url.encode("utf-8")).hexdigest()[:12]
|
||||
return f"{self._safe_name(name)}-{digest}"
|
||||
|
||||
def _safe_name(self, value: str) -> str:
|
||||
safe = "".join(char if char.isalnum() or char in "-_" else "-" for char in value)
|
||||
return safe.strip("-") or "repository"
|
||||
|
||||
def _run_git(self, args: list[str], *, cwd: Path | None) -> None:
|
||||
if shutil.which("git") is None:
|
||||
raise RuntimeError("git executable was not found")
|
||||
result = subprocess.run(
|
||||
["git", *args],
|
||||
cwd=cwd,
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
message = result.stderr.strip() or result.stdout.strip()
|
||||
raise RuntimeError(f"git {' '.join(args)} failed: {message}")
|
||||
@@ -7,11 +7,13 @@ from fastapi import Depends, FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from repo_registry.core.service import RegistryService
|
||||
from repo_registry.repo_ingestion.git import GitIngestionService
|
||||
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
|
||||
|
||||
|
||||
class Settings(BaseModel):
|
||||
database_path: str = Field(default="var/repo-registry.sqlite3")
|
||||
checkout_root: str = Field(default="var/checkouts")
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
@@ -23,7 +25,7 @@ def get_service(settings: Settings = Depends(get_settings)) -> RegistryService:
|
||||
database_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
store = RegistryStore(database_path)
|
||||
store.initialize()
|
||||
return RegistryService(store)
|
||||
return RegistryService(store, ingestion=GitIngestionService(settings.checkout_root))
|
||||
|
||||
|
||||
class RepositoryCreate(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user