generated from coulomb/repo-seed
feat(repos): git-fingerprint-based machine-independent repo identity
Add git_fingerprint (root commit SHA-1) to managed_repos as a stable,
machine-independent identifier — identical across every clone regardless
of checkout path, remote URL, or SSH alias.
- Migration n1i2j3k4l5m6: adds git_fingerprint column + non-unique index
(non-unique to support repos that share ancestry via forks/splits)
- GET /repos/by-fingerprint?hash=<sha>[&remote_url=<url>]: lookup by
fingerprint; optional remote_url disambiguates shared-ancestry repos
- GET /repos/by-remote?url=<url>: fallback lookup by remote URL
- consistency_check.py --here [PATH]: auto-detects repo slug from any
local checkout via fingerprint (falls back to remote URL), then auto-
registers host_paths[hostname] so subsequent runs need no override
- --all now includes repos with host_paths[current_hostname], not just
those with local_path
- fix-consistency-here / check-consistency-here Makefile targets
- Fixed _api_get bug: httpx strips query strings when params={} is passed
- Backfilled fingerprints for 14 repos on this host
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -27,6 +27,7 @@ class ManagedRepo(Base, TimestampMixin):
|
||||
topic_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("topics.id", ondelete="SET NULL"), nullable=True
|
||||
)
|
||||
git_fingerprint: Mapped[str | None] = mapped_column(String(40), nullable=True, index=True)
|
||||
sbom_source: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
last_sbom_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True
|
||||
|
||||
@@ -65,6 +65,7 @@ async def register_repo(
|
||||
name=body.name,
|
||||
local_path=body.local_path,
|
||||
remote_url=body.remote_url,
|
||||
git_fingerprint=body.git_fingerprint,
|
||||
description=body.description,
|
||||
topic_id=body.topic_id,
|
||||
)
|
||||
@@ -74,6 +75,43 @@ async def register_repo(
|
||||
return repo
|
||||
|
||||
|
||||
@router.get("/by-fingerprint", response_model=list[RepoRead])
|
||||
async def get_repo_by_fingerprint(
|
||||
hash: str,
|
||||
remote_url: str | None = None,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[ManagedRepo]:
|
||||
"""Look up repos by git root-commit SHA-1 fingerprint.
|
||||
|
||||
The fingerprint is the output of ``git rev-list --max-parents=0 HEAD`` and
|
||||
is identical across every clone of the same repository. Repos that share
|
||||
git history (forks, monorepo splits) will have the same fingerprint.
|
||||
|
||||
Pass ``remote_url`` to narrow results to a specific remote — useful when
|
||||
multiple repos share the same ancestor commit.
|
||||
|
||||
Returns an empty list if no match is found.
|
||||
"""
|
||||
q = select(ManagedRepo).where(ManagedRepo.git_fingerprint == hash)
|
||||
if remote_url:
|
||||
q = q.where(ManagedRepo.remote_url == remote_url)
|
||||
result = await session.execute(q)
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
@router.get("/by-remote", response_model=RepoRead)
|
||||
async def get_repo_by_remote_url(
|
||||
url: str,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> ManagedRepo:
|
||||
"""Look up a repo by its git remote URL (fallback; prefer /by-fingerprint)."""
|
||||
result = await session.execute(select(ManagedRepo).where(ManagedRepo.remote_url == url))
|
||||
repo = result.scalar_one_or_none()
|
||||
if repo is None:
|
||||
raise HTTPException(status_code=404, detail=f"No repo with remote_url '{url}' found")
|
||||
return repo
|
||||
|
||||
|
||||
@router.get("/doi/summary", response_model=list[DoISummaryEntry])
|
||||
async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoISummaryEntry]:
|
||||
"""Return DoI tier for all active repos, worst tier first.
|
||||
|
||||
@@ -11,6 +11,7 @@ class RepoCreate(BaseModel):
|
||||
name: str
|
||||
local_path: str | None = None
|
||||
remote_url: str | None = None
|
||||
git_fingerprint: str | None = None
|
||||
description: str | None = None
|
||||
topic_id: uuid.UUID | None = None
|
||||
|
||||
@@ -19,6 +20,7 @@ class RepoUpdate(BaseModel):
|
||||
name: str | None = None
|
||||
local_path: str | None = None
|
||||
remote_url: str | None = None
|
||||
git_fingerprint: str | None = None
|
||||
description: str | None = None
|
||||
topic_id: uuid.UUID | None = None
|
||||
last_state_synced_at: datetime | None = None
|
||||
@@ -40,6 +42,7 @@ class RepoRead(BaseModel):
|
||||
local_path: str | None = None
|
||||
host_paths: dict = {}
|
||||
remote_url: str | None = None
|
||||
git_fingerprint: str | None = None
|
||||
description: str | None = None
|
||||
status: str
|
||||
topic_id: uuid.UUID | None = None
|
||||
|
||||
Reference in New Issue
Block a user