Implement HTTP ingestion and retention lifecycle

This commit is contained in:
2026-05-16 23:10:21 +02:00
parent 2173f702c1
commit c33baa3635
15 changed files with 2478 additions and 69 deletions

View File

@@ -10,9 +10,14 @@ from __future__ import annotations
import asyncio
import json
import mimetypes
import urllib.error
import urllib.request
import uuid
from pathlib import Path
from typing import Any
import click
import typer
from artifactstore import __version__
@@ -28,6 +33,8 @@ app = typer.Typer(
help="artifact-store: artifact registry and storage gateway",
no_args_is_help=True,
)
retention_app = typer.Typer(help="Retention lifecycle commands", no_args_is_help=True)
app.add_typer(retention_app, name="retention")
@app.callback()
@@ -74,6 +81,107 @@ def health() -> None:
typer.echo(json.dumps(payload, indent=2))
@app.command()
def push(
directory: Path = typer.Argument(
...,
exists=True,
file_okay=False,
dir_okay=True,
readable=True,
help="Directory to push as one artifact package.",
),
producer: str = typer.Option(..., "--producer", help="Producer slug for the package."),
subject: str = typer.Option(..., "--subject", help="Subject identifier for the package."),
retention_class: str = typer.Option(
"raw-evidence",
"--retention-class",
help="Retention class id to apply.",
),
name: str | None = typer.Option(None, "--name", help="Package name; defaults to dir name."),
api_url: str | None = typer.Option(None, "--api-url", help="artifact-store base URL."),
token: str | None = typer.Option(None, "--token", help="Bearer token for the HTTP API."),
) -> None:
"""Push a directory through the HTTP API and finalize the package."""
settings = get_settings()
base_url = api_url or settings.api_url
bearer = token or settings.api_token
if not bearer:
raise click.ClickException("provide --token or ARTIFACTSTORE_API_TOKEN")
files = sorted(path for path in directory.rglob("*") if path.is_file())
package = _http_json(
"POST",
base_url,
"/packages",
bearer,
{
"name": name or directory.name,
"producer": producer,
"subject": subject,
"retention_class": retention_class,
"metadata": {"source_directory": str(directory)},
},
)
package_id = str(package["id"])
for path in files:
rel_path = path.relative_to(directory).as_posix()
media_type = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
_http_multipart(
base_url,
f"/packages/{package_id}/files",
bearer,
fields={"relative_path": rel_path, "media_type": media_type},
file_field="file",
file_name=path.name,
file_content_type=media_type,
file_bytes=path.read_bytes(),
)
finalized = _http_json("POST", base_url, f"/packages/{package_id}/finalize", bearer, {})
typer.echo(
json.dumps(
{
"package_id": package_id,
"manifest_digest": finalized.get("manifest_digest"),
"files": len(files),
},
indent=2,
)
)
@app.command()
def manifest(
package_id: str = typer.Argument(..., help="Package UUID."),
api_url: str | None = typer.Option(None, "--api-url", help="artifact-store base URL."),
token: str | None = typer.Option(None, "--token", help="Bearer token for the HTTP API."),
) -> None:
"""Print a package manifest JSON projection from the HTTP API."""
settings = get_settings()
base_url = api_url or settings.api_url
bearer = token or settings.api_token
if not bearer:
raise click.ClickException("provide --token or ARTIFACTSTORE_API_TOKEN")
payload = _http_bytes(
"GET",
base_url,
f"/packages/{package_id}/manifest.json",
bearer,
headers={"Accept": "application/json"},
)
typer.echo(payload.decode("utf-8"))
@retention_app.command("sweep")
def retention_sweep() -> None:
"""Run the deletion-eligibility sweeper once against the configured DB."""
settings = get_settings()
marked = asyncio.run(_retention_sweep_async(settings))
typer.echo(json.dumps({"marked_package_ids": marked, "marked_count": len(marked)}, indent=2))
# ---- internals -------------------------------------------------------------
@@ -110,5 +218,121 @@ async def _health_async(settings: Settings) -> dict[str, Any]:
}
async def _retention_sweep_async(settings: Settings) -> list[str]:
from artifactstore.app import build_registry
registry: Registry = build_registry(settings)
try:
marked = await registry.sweep_deletion_eligibility()
finally:
await registry.dispose()
return [str(package_id) for package_id in marked]
def _http_json(
method: str,
base_url: str,
path: str,
token: str,
payload: dict[str, Any],
) -> dict[str, Any]:
response = _http_bytes(
method,
base_url,
path,
token,
body=json.dumps(payload).encode("utf-8"),
headers={"Content-Type": "application/json", "Accept": "application/json"},
)
decoded = json.loads(response)
if not isinstance(decoded, dict):
raise click.ClickException(f"expected JSON object from {path}")
return decoded
def _http_multipart(
base_url: str,
path: str,
token: str,
*,
fields: dict[str, str],
file_field: str,
file_name: str,
file_content_type: str,
file_bytes: bytes,
) -> dict[str, Any]:
boundary = f"artifactstore-{uuid.uuid4().hex}"
body = bytearray()
for name, value in fields.items():
body.extend(f"--{boundary}\r\n".encode("ascii"))
body.extend(
f'Content-Disposition: form-data; name="{_quote_header_value(name)}"\r\n\r\n'.encode()
)
body.extend(value.encode())
body.extend(b"\r\n")
body.extend(f"--{boundary}\r\n".encode("ascii"))
body.extend(
(
f'Content-Disposition: form-data; name="{_quote_header_value(file_field)}"; '
f'filename="{_quote_header_value(file_name)}"\r\n'
f"Content-Type: {file_content_type}\r\n\r\n"
).encode()
)
body.extend(file_bytes)
body.extend(b"\r\n")
body.extend(f"--{boundary}--\r\n".encode("ascii"))
response = _http_bytes(
"POST",
base_url,
path,
token,
body=bytes(body),
headers={
"Content-Type": f"multipart/form-data; boundary={boundary}",
"Accept": "application/json",
},
)
decoded = json.loads(response)
if not isinstance(decoded, dict):
raise click.ClickException(f"expected JSON object from {path}")
return decoded
def _http_bytes(
method: str,
base_url: str,
path: str,
token: str,
*,
body: bytes | None = None,
headers: dict[str, str] | None = None,
) -> bytes:
url = f"{base_url.rstrip('/')}/{path.lstrip('/')}"
effective_headers = dict(headers or {})
effective_headers["Authorization"] = f"Bearer {token}"
request = urllib.request.Request(
url,
data=body,
headers=effective_headers,
method=method,
)
try:
with urllib.request.urlopen(request, timeout=60) as response:
data = response.read()
if isinstance(data, bytes):
return data
return bytes(data)
except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace")
raise click.ClickException(f"HTTP {exc.code} from {path}: {detail}") from exc
except urllib.error.URLError as exc:
raise click.ClickException(f"could not reach {url}: {exc.reason}") from exc
def _quote_header_value(value: str) -> str:
return value.replace("\\", "\\\\").replace('"', '\\"')
if __name__ == "__main__": # pragma: no cover
app()