diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index a2c47d8..5c0ab8c 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -62,9 +62,16 @@ class RegistryService: name: str | None = None, description: str | None = None, branch: str = "main", + access_username: str | None = None, + access_password: str | None = None, ) -> Repository: if name is None or description is None: - checkout = self.ingestion.resolve(url, branch=branch) + checkout = self.ingestion.resolve( + url, + branch=branch, + access_username=access_username, + access_password=access_password, + ) metadata = self.metadata_extractor.extract(checkout.source_path, url) else: metadata = None diff --git a/src/repo_registry/repo_ingestion/git.py b/src/repo_registry/repo_ingestion/git.py index b327515..d7b7fd5 100644 --- a/src/repo_registry/repo_ingestion/git.py +++ b/src/repo_registry/repo_ingestion/git.py @@ -3,8 +3,10 @@ from __future__ import annotations import hashlib import shutil import subprocess +import os from dataclasses import dataclass from pathlib import Path +from base64 import b64encode from urllib.parse import urlparse @@ -18,7 +20,14 @@ class GitIngestionService: def __init__(self, checkout_root: str | Path = "var/checkouts") -> None: self.checkout_root = Path(checkout_root) - def resolve(self, url_or_path: str, *, branch: str = "main") -> Checkout: + def resolve( + self, + url_or_path: str, + *, + branch: str = "main", + access_username: str | None = None, + access_password: str | None = None, + ) -> Checkout: local_path = self._local_path(url_or_path) if local_path is not None: return Checkout(source_path=local_path.resolve(), was_cloned=False) @@ -26,17 +35,49 @@ class GitIngestionService: checkout_path = self.checkout_root / self._checkout_key(url_or_path) self.checkout_root.mkdir(parents=True, exist_ok=True) if checkout_path.exists(): - self._run_git(["fetch", "--all", "--prune"], cwd=checkout_path) + self._run_git( + ["fetch", "--all", "--prune"], + cwd=checkout_path, + access_username=access_username, + access_password=access_password, + ) else: - self._run_git(["clone", url_or_path, str(checkout_path)], cwd=None) + self._run_git( + ["clone", url_or_path, str(checkout_path)], + cwd=None, + access_username=access_username, + access_password=access_password, + ) - self._checkout_branch(checkout_path, branch) + self._checkout_branch( + checkout_path, + branch, + access_username=access_username, + access_password=access_password, + ) return Checkout(source_path=checkout_path.resolve(), was_cloned=True) - def _checkout_branch(self, checkout_path: Path, branch: str) -> None: + def _checkout_branch( + self, + checkout_path: Path, + branch: str, + *, + access_username: str | None = None, + access_password: str | None = None, + ) -> None: if branch: - self._run_git(["checkout", branch], cwd=checkout_path) - self._run_git(["pull", "--ff-only"], cwd=checkout_path) + self._run_git( + ["checkout", branch], + cwd=checkout_path, + access_username=access_username, + access_password=access_password, + ) + self._run_git( + ["pull", "--ff-only"], + cwd=checkout_path, + access_username=access_username, + access_password=access_password, + ) def _local_path(self, value: str) -> Path | None: parsed = urlparse(value) @@ -60,10 +101,23 @@ class GitIngestionService: safe = "".join(char if char.isalnum() or char in "-_" else "-" for char in value) return safe.strip("-") or "repository" - def _run_git(self, args: list[str], *, cwd: Path | None) -> None: + def _run_git( + self, + args: list[str], + *, + cwd: Path | None, + access_username: str | None = None, + access_password: str | None = None, + ) -> None: if shutil.which("git") is None: raise RuntimeError("git executable was not found") - command = ["git", *args] + auth_config = self._auth_config(access_username, access_password) + command = ["git", *auth_config, *args] + env = { + **os.environ, + "GIT_TERMINAL_PROMPT": "0", + "GIT_ASKPASS": "echo", + } try: result = subprocess.run( command, @@ -72,11 +126,46 @@ class GitIngestionService: capture_output=True, text=True, timeout=120, + env=env, ) except subprocess.TimeoutExpired as exc: raise RuntimeError( - f"git {' '.join(args)} timed out after {exc.timeout} seconds" + f"git {' '.join(args)} timed out after {exc.timeout} seconds. " + "If this is a private repository, provide HTTP access credentials." ) from exc if result.returncode != 0: message = result.stderr.strip() or result.stdout.strip() + if self._looks_like_auth_failure(message): + raise RuntimeError( + f"git {' '.join(args)} failed: authentication required. " + "Provide a username and password or access token for this repository." + ) raise RuntimeError(f"git {' '.join(args)} failed: {message}") + + def _auth_config( + self, + access_username: str | None, + access_password: str | None, + ) -> list[str]: + if not access_username or not access_password: + return [] + token = b64encode( + f"{access_username}:{access_password}".encode("utf-8") + ).decode("ascii") + return ["-c", f"http.extraHeader=Authorization: Basic {token}"] + + def _looks_like_auth_failure(self, message: str) -> bool: + lowered = message.lower() + return any( + phrase in lowered + for phrase in ( + "authentication failed", + "could not read username", + "could not read password", + "terminal prompts disabled", + "authentication required", + "access denied", + "401", + "403", + ) + ) diff --git a/src/repo_registry/web_api/schemas.py b/src/repo_registry/web_api/schemas.py index b665750..6d8e143 100644 --- a/src/repo_registry/web_api/schemas.py +++ b/src/repo_registry/web_api/schemas.py @@ -52,6 +52,8 @@ class RepositoryCreate(BaseModel): name: str | None = None description: str | None = None branch: str = "main" + access_username: str | None = None + access_password: str | None = Field(default=None, repr=False) model_config = { "json_schema_extra": { @@ -61,6 +63,8 @@ class RepositoryCreate(BaseModel): "name": "Example Repository", "description": "Optional human-readable repository summary.", "branch": "main", + "access_username": None, + "access_password": None, } ] } diff --git a/src/repo_registry/web_ui/views.py b/src/repo_registry/web_ui/views.py index b8bbc7f..6449aad 100644 --- a/src/repo_registry/web_ui/views.py +++ b/src/repo_registry/web_ui/views.py @@ -201,6 +201,8 @@ def render_repository_index(
+ +
Registering repository... @@ -410,12 +412,16 @@ def search_page( def create_repository_from_form( url: str = Form(...), branch: str = Form("main"), + access_username: str = Form(""), + access_password: str = Form(""), service: RegistryService = Depends(get_service), ): try: repository = service.register_repository( url=url, branch=branch or "main", + access_username=access_username or None, + access_password=access_password or None, ) except (RuntimeError, ValueError) as exc: return render_repository_index( diff --git a/tests/test_git_ingestion.py b/tests/test_git_ingestion.py index ebb19c5..fc7e3f2 100644 --- a/tests/test_git_ingestion.py +++ b/tests/test_git_ingestion.py @@ -45,3 +45,38 @@ def test_ingestion_clones_file_url(tmp_path): text=True, ).stdout.strip() assert branch == "main" + + +def test_git_commands_fail_fast_and_accept_ephemeral_http_credentials(monkeypatch): + calls = [] + + def fake_run(command, **kwargs): + calls.append((command, kwargs)) + return subprocess.CompletedProcess( + command, + 128, + stdout="", + stderr="fatal: could not read Username for 'https://example.com': terminal prompts disabled", + ) + + monkeypatch.setattr(subprocess, "run", fake_run) + + service = GitIngestionService() + + try: + service._run_git( + ["clone", "https://example.com/private.git", "/tmp/private"], + cwd=None, + access_username="user", + access_password="secret", + ) + except RuntimeError as exc: + message = str(exc) + else: + raise AssertionError("expected authentication failure") + + command, kwargs = calls[0] + assert command[:3] == ["git", "-c", "http.extraHeader=Authorization: Basic dXNlcjpzZWNyZXQ="] + assert kwargs["env"]["GIT_TERMINAL_PROMPT"] == "0" + assert "authentication required" in message + assert "secret" not in message diff --git a/tests/test_web_api.py b/tests/test_web_api.py index ec7a59c..ac1a2d9 100644 --- a/tests/test_web_api.py +++ b/tests/test_web_api.py @@ -1074,12 +1074,15 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): assert index_response.status_code == 200 assert "Register Repository" in index_response.text assert "Registering repository..." in index_response.text + assert "Password or access token" in index_response.text create_response = client.post( "/ui/repos", data={ "url": str(source), "branch": "main", + "access_username": "", + "access_password": "", }, follow_redirects=False, )