generated from coulomb/repo-seed
Add state-hub v0.1 — local-first state service for the Custodian
Implements the first live layer of the Custodian cognitive infrastructure: PostgreSQL schema, FastAPI REST API, FastMCP stdio server, and Observable Framework telemetry dashboard. - state-hub/: full stack (docker-compose, FastAPI, Alembic, MCP server, dashboard) - 5 DB tables: topics, workstreams, tasks, decisions, progress_events - 11 MCP tools + 5 resources registered in .mcp.json - Observable dashboard: Overview, Workstreams, Decisions, Progress pages - CLAUDE.md: session protocol (get_state_summary / add_progress_event ritual) - ~/.claude/CLAUDE.md: global cross-project reference to the hub - scripts/pull_image.py: WSL2 TLS-resilient Docker image downloader Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
185
scripts/pull_image.py
Normal file
185
scripts/pull_image.py
Normal file
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Pull a Docker Hub image via the registry v2 API using Python's SSL (OpenSSL),
|
||||
then import it via `docker load`. Bypasses Docker's Go TLS client entirely.
|
||||
|
||||
Usage: python pull_image.py <image:tag> [output.tar]
|
||||
e.g: python pull_image.py postgres:16-alpine postgres.tar
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
import tempfile
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import hashlib
|
||||
|
||||
|
||||
def get_token(repo: str) -> str:
|
||||
url = f"https://auth.docker.io/token?service=registry.docker.io&scope=repository:{repo}:pull"
|
||||
with urllib.request.urlopen(url, timeout=30) as r:
|
||||
return json.loads(r.read())["token"]
|
||||
|
||||
|
||||
class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler):
|
||||
"""Follow redirects but strip Authorization; keep Range and other headers."""
|
||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||
new_req = urllib.request.Request(newurl)
|
||||
# Forward Range header (needed for chunked downloads) but NOT Authorization
|
||||
for hdr in ("Range",):
|
||||
val = req.get_header(hdr.capitalize())
|
||||
if val:
|
||||
new_req.add_header(hdr, val)
|
||||
return new_req
|
||||
|
||||
|
||||
def _opener():
|
||||
return urllib.request.build_opener(_StripAuthOnRedirect())
|
||||
|
||||
|
||||
def registry_get(url: str, token: str, headers: dict | None = None) -> bytes:
|
||||
"""GET with Bearer auth; follows redirects WITHOUT auth (for S3/CDN blobs)."""
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}", **(headers or {})})
|
||||
with _opener().open(req, timeout=60) as r:
|
||||
return r.read()
|
||||
|
||||
|
||||
def pull_image(image: str, output_tar: str) -> None:
|
||||
if ":" in image:
|
||||
repo_name, tag = image.rsplit(":", 1)
|
||||
else:
|
||||
repo_name, tag = image, "latest"
|
||||
|
||||
if "/" not in repo_name:
|
||||
repo_name = f"library/{repo_name}"
|
||||
|
||||
print(f"Authenticating for {repo_name}:{tag} ...")
|
||||
token = get_token(repo_name)
|
||||
|
||||
# Fetch manifest (prefer OCI, fall back to v2 schema2)
|
||||
print("Fetching manifest ...")
|
||||
manifest_url = f"https://registry-1.docker.io/v2/{repo_name}/manifests/{tag}"
|
||||
manifest_bytes = registry_get(
|
||||
manifest_url,
|
||||
token,
|
||||
headers={"Accept": "application/vnd.docker.distribution.manifest.v2+json,"
|
||||
"application/vnd.oci.image.manifest.v1+json"},
|
||||
)
|
||||
manifest = json.loads(manifest_bytes)
|
||||
|
||||
# Handle manifest list (multi-arch) — pick linux/amd64
|
||||
media_type = manifest.get("mediaType", "") or manifest.get("schemaVersion", "")
|
||||
if "list" in str(media_type) or manifest.get("manifests"):
|
||||
print("Manifest list detected — selecting linux/amd64 ...")
|
||||
for m in manifest["manifests"]:
|
||||
plat = m.get("platform", {})
|
||||
if plat.get("os") == "linux" and plat.get("architecture") == "amd64":
|
||||
digest = m["digest"]
|
||||
manifest_bytes = registry_get(
|
||||
f"https://registry-1.docker.io/v2/{repo_name}/manifests/{digest}",
|
||||
token,
|
||||
headers={"Accept": "application/vnd.docker.distribution.manifest.v2+json"},
|
||||
)
|
||||
manifest = json.loads(manifest_bytes)
|
||||
break
|
||||
|
||||
config_digest = manifest["config"]["digest"]
|
||||
layers = manifest["layers"]
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
# Download config blob
|
||||
print("Downloading config ...")
|
||||
config_data = registry_get(
|
||||
f"https://registry-1.docker.io/v2/{repo_name}/blobs/{config_digest}",
|
||||
token,
|
||||
)
|
||||
config_filename = config_digest.replace("sha256:", "") + ".json"
|
||||
config_path = os.path.join(tmpdir, config_filename)
|
||||
with open(config_path, "wb") as f:
|
||||
f.write(config_data)
|
||||
|
||||
# Download each layer
|
||||
layer_dirs = []
|
||||
for i, layer in enumerate(layers):
|
||||
digest = layer["digest"]
|
||||
size = layer["size"]
|
||||
short = digest[7:19]
|
||||
print(f"Downloading layer {i+1}/{len(layers)} ({short}..., {size//1024//1024}MB) ...")
|
||||
|
||||
blob_url = f"https://registry-1.docker.io/v2/{repo_name}/blobs/{digest}"
|
||||
req = urllib.request.Request(blob_url, headers={"Authorization": f"Bearer {token}"})
|
||||
|
||||
layer_dir = os.path.join(tmpdir, f"layer_{i}")
|
||||
os.makedirs(layer_dir)
|
||||
layer_tar = os.path.join(layer_dir, "layer.tar")
|
||||
version_file = os.path.join(layer_dir, "VERSION")
|
||||
json_file = os.path.join(layer_dir, "json")
|
||||
|
||||
# Stream download with Range-request chunking so a TCP corruption
|
||||
# only loses one 2MB chunk, not the whole download.
|
||||
CHUNK_SIZE = 2 * 1024 * 1024 # 2MB per Range request
|
||||
downloaded = 0
|
||||
with open(layer_tar, "wb") as f:
|
||||
while downloaded < size:
|
||||
end = min(downloaded + CHUNK_SIZE - 1, size - 1)
|
||||
while True:
|
||||
try:
|
||||
range_req = urllib.request.Request(
|
||||
blob_url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Range": f"bytes={downloaded}-{end}",
|
||||
},
|
||||
)
|
||||
with _opener().open(range_req, timeout=60) as resp:
|
||||
data = resp.read()
|
||||
break
|
||||
except Exception as exc:
|
||||
print(f"\r retry at {downloaded//1024//1024}MB ({exc})...", end="", flush=True)
|
||||
import time; time.sleep(1)
|
||||
f.write(data)
|
||||
downloaded += len(data)
|
||||
pct = downloaded * 100 // size if size else 0
|
||||
print(f"\r {downloaded//1024//1024}MB / {size//1024//1024}MB ({pct}%)", end="", flush=True)
|
||||
print()
|
||||
|
||||
with open(version_file, "w") as f:
|
||||
f.write("1.0")
|
||||
with open(json_file, "w") as f:
|
||||
json.dump({"id": digest.replace("sha256:", "")}, f)
|
||||
|
||||
layer_dirs.append(f"layer_{i}/layer.tar")
|
||||
|
||||
# Write manifest.json
|
||||
manifest_json = [
|
||||
{
|
||||
"Config": config_filename,
|
||||
"RepoTags": [f"{repo_name.replace('library/', '')}:{tag}"],
|
||||
"Layers": layer_dirs,
|
||||
}
|
||||
]
|
||||
manifest_path = os.path.join(tmpdir, "manifest.json")
|
||||
with open(manifest_path, "w") as f:
|
||||
json.dump(manifest_json, f)
|
||||
|
||||
# Bundle into tar
|
||||
print(f"Building {output_tar} ...")
|
||||
with tarfile.open(output_tar, "w") as tar:
|
||||
for name in [config_filename, "manifest.json"]:
|
||||
tar.add(os.path.join(tmpdir, name), arcname=name)
|
||||
for i in range(len(layers)):
|
||||
for fname in ["layer.tar", "VERSION", "json"]:
|
||||
path = os.path.join(tmpdir, f"layer_{i}", fname)
|
||||
tar.add(path, arcname=f"layer_{i}/{fname}")
|
||||
|
||||
print(f"Done. Load with: docker load -i {output_tar}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: pull_image.py <image:tag> [output.tar]")
|
||||
sys.exit(1)
|
||||
image = sys.argv[1]
|
||||
output = sys.argv[2] if len(sys.argv) > 2 else image.replace(":", "_").replace("/", "_") + ".tar"
|
||||
pull_image(image, output)
|
||||
Reference in New Issue
Block a user