WP-0001-T002: registry data model, Alembic, initial migration with retention seed

Schema (src/artifactstore/db/schema.py):
- events table (ADR-0002 source of truth): sequence BIGSERIAL PK, created_at,
  event_type, subject_kind, subject_id, actor, payload (CBOR bytes),
  payload_digest. Indexes on (subject_kind, subject_id) and
  (event_type, sequence).
- artifact_packages, artifact_files, storage_locations, retention_state
  (materialised views over events).
- retention_classes (seed table) and metadata_schemas (config table).
- ADR-0001 columns present: digest_algorithm, digest_primary, digest_sha256,
  content_address. Blueprint tiering columns present: retrieval_tier
  (default 'hot'), restore_status.
- Types portable: SQLAlchemy 2.0 Core with JSON().with_variant(JSONB, 'postgresql'),
  Uuid, LargeBinary, DateTime(timezone=True), Boolean false() default.

Seed (src/artifactstore/db/seed.py): five v1 retention classes (transient,
raw-evidence, summary-evidence, release-evidence, permanent-record) with
default durations in seconds; permanent-record has no expiry.

Alembic:
- alembic.ini with sync sqlite URL default; path_separator=os to silence the
  1.13 deprecation warning.
- migrations/env.py: translates async URLs (+aiosqlite, +asyncpg) to sync
  counterparts at migrate-time so a single ARTIFACTSTORE_DATABASE_URL works
  for both runtime (async) and Alembic (sync).
- migrations/script.py.mako template.
- migrations/versions/20260516_0001_initial.py: metadata.create_all + bulk
  insert of retention class seeds.

Make:
- make migrate: alembic upgrade head (ensures var/ exists).
- make migrate-fresh: drop local SQLite + re-run.

Deps: psycopg[binary] added as optional `postgres` extra (PostgreSQL prod
path; SQLite default for dev needs no extra).

Tests:
- tests/unit/test_db_schema.py: every expected table present; ADR-0001 and
  tiering columns present; seed has the five v1 classes; permanent-record
  has no default_duration; create_all + FK insert + Boolean default
  round-trip on in-memory SQLite.
- tests/integration/test_migrations.py: alembic upgrade head against a
  tempfile SQLite produces all tables (+ alembic_version) and the seed rows.

Gates: ruff clean, mypy --strict clean on 32 files, 38 tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-16 01:50:38 +02:00
parent d14ee517d9
commit f8097cb683
14 changed files with 749 additions and 15 deletions

73
migrations/env.py Normal file
View File

@@ -0,0 +1,73 @@
"""Alembic environment configuration.
The migration runner is sync; the runtime service is async. To support a
single configured ``ARTIFACTSTORE_DATABASE_URL``, this module rewrites
async driver URLs (``+aiosqlite``, ``+asyncpg``) to their sync counterparts
when invoking Alembic.
"""
from __future__ import annotations
import sys
from logging.config import fileConfig
from pathlib import Path
from alembic import context
from sqlalchemy import engine_from_config, pool
_ROOT = Path(__file__).resolve().parent.parent
_SRC = _ROOT / "src"
if str(_SRC) not in sys.path:
sys.path.insert(0, str(_SRC))
from artifactstore.config import get_settings # noqa: E402
from artifactstore.db.schema import metadata as target_metadata # noqa: E402
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
def _sync_url(url: str) -> str:
"""Translate an async driver URL to its sync counterpart for Alembic."""
if "+aiosqlite" in url:
return url.replace("+aiosqlite", "")
if "+asyncpg" in url:
return url.replace("+asyncpg", "+psycopg")
return url
_settings = get_settings()
config.set_main_option("sqlalchemy.url", _sync_url(_settings.database_url))
def run_migrations_offline() -> None:
"""Emit SQL without a live DB connection."""
context.configure(
url=config.get_main_option("sqlalchemy.url"),
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations against a live DB connection."""
section = config.get_section(config.config_ini_section) or {}
connectable = engine_from_config(
section,
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

27
migrations/script.py.mako Normal file
View File

@@ -0,0 +1,27 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
${imports if imports else ""}
revision: str = ${repr(up_revision)}
down_revision: str | None = ${repr(down_revision)}
branch_labels: str | Sequence[str] | None = ${repr(branch_labels)}
depends_on: str | Sequence[str] | None = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,39 @@
"""initial schema (events + materialised views + retention seed).
Revision ID: 0001_initial
Revises:
Create Date: 2026-05-16
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from artifactstore.db.schema import metadata
from artifactstore.db.seed import RETENTION_CLASS_SEEDS
revision: str = "0001_initial"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
bind = op.get_bind()
metadata.create_all(bind=bind)
retention_classes_lt = sa.table(
"retention_classes",
sa.column("class_id", sa.String),
sa.column("default_duration_seconds", sa.BigInteger),
sa.column("deletion_strategy", sa.String),
)
op.bulk_insert(retention_classes_lt, [dict(s) for s in RETENTION_CLASS_SEEDS])
def downgrade() -> None:
bind = op.get_bind()
metadata.drop_all(bind=bind)