generated from coulomb/repo-seed
WP-0001-T002: registry data model, Alembic, initial migration with retention seed
Schema (src/artifactstore/db/schema.py): - events table (ADR-0002 source of truth): sequence BIGSERIAL PK, created_at, event_type, subject_kind, subject_id, actor, payload (CBOR bytes), payload_digest. Indexes on (subject_kind, subject_id) and (event_type, sequence). - artifact_packages, artifact_files, storage_locations, retention_state (materialised views over events). - retention_classes (seed table) and metadata_schemas (config table). - ADR-0001 columns present: digest_algorithm, digest_primary, digest_sha256, content_address. Blueprint tiering columns present: retrieval_tier (default 'hot'), restore_status. - Types portable: SQLAlchemy 2.0 Core with JSON().with_variant(JSONB, 'postgresql'), Uuid, LargeBinary, DateTime(timezone=True), Boolean false() default. Seed (src/artifactstore/db/seed.py): five v1 retention classes (transient, raw-evidence, summary-evidence, release-evidence, permanent-record) with default durations in seconds; permanent-record has no expiry. Alembic: - alembic.ini with sync sqlite URL default; path_separator=os to silence the 1.13 deprecation warning. - migrations/env.py: translates async URLs (+aiosqlite, +asyncpg) to sync counterparts at migrate-time so a single ARTIFACTSTORE_DATABASE_URL works for both runtime (async) and Alembic (sync). - migrations/script.py.mako template. - migrations/versions/20260516_0001_initial.py: metadata.create_all + bulk insert of retention class seeds. Make: - make migrate: alembic upgrade head (ensures var/ exists). - make migrate-fresh: drop local SQLite + re-run. Deps: psycopg[binary] added as optional `postgres` extra (PostgreSQL prod path; SQLite default for dev needs no extra). Tests: - tests/unit/test_db_schema.py: every expected table present; ADR-0001 and tiering columns present; seed has the five v1 classes; permanent-record has no default_duration; create_all + FK insert + Boolean default round-trip on in-memory SQLite. - tests/integration/test_migrations.py: alembic upgrade head against a tempfile SQLite produces all tables (+ alembic_version) and the seed rows. Gates: ruff clean, mypy --strict clean on 32 files, 38 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
12
src/artifactstore/db/__init__.py
Normal file
12
src/artifactstore/db/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Database schema and engine factory.
|
||||
|
||||
The ``schema`` submodule owns the SQLAlchemy Core :class:`MetaData` and
|
||||
:class:`Table` definitions referenced by both migrations and runtime queries.
|
||||
``engine`` exposes the async engine factory. ``seed`` holds bootstrap data
|
||||
applied by the initial migration.
|
||||
"""
|
||||
|
||||
from artifactstore.db import schema, seed
|
||||
from artifactstore.db.engine import create_engine
|
||||
|
||||
__all__ = ["create_engine", "schema", "seed"]
|
||||
12
src/artifactstore/db/engine.py
Normal file
12
src/artifactstore/db/engine.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Async SQLAlchemy engine factory."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
||||
|
||||
from artifactstore.config import Settings
|
||||
|
||||
|
||||
def create_engine(settings: Settings) -> AsyncEngine:
|
||||
"""Construct the runtime async engine from settings."""
|
||||
return create_async_engine(settings.database_url, echo=False, future=True)
|
||||
160
src/artifactstore/db/schema.py
Normal file
160
src/artifactstore/db/schema.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""Database schema (ADR-0002 + ARCHITECTURE-BLUEPRINT data model).
|
||||
|
||||
All tables are defined via SQLAlchemy Core so the same definitions drive
|
||||
migrations (Alembic) and runtime queries (registry orchestrator). Types use
|
||||
the portable SQLAlchemy 2.0 forms; PostgreSQL-specific variants are layered
|
||||
via :func:`with_variant` where the gain (e.g. ``JSONB`` over ``JSON``) is
|
||||
meaningful.
|
||||
|
||||
The ``events`` table is the source of truth (ADR-0002). The other tables
|
||||
are materialised views rebuildable from the event log.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import (
|
||||
JSON,
|
||||
BigInteger,
|
||||
Boolean,
|
||||
Column,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Index,
|
||||
LargeBinary,
|
||||
MetaData,
|
||||
String,
|
||||
Table,
|
||||
UniqueConstraint,
|
||||
func,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.sql import false
|
||||
from sqlalchemy.types import Uuid
|
||||
|
||||
metadata = MetaData()
|
||||
|
||||
_JSON_TYPE = JSON().with_variant(JSONB(), "postgresql")
|
||||
|
||||
|
||||
events = Table(
|
||||
"events",
|
||||
metadata,
|
||||
Column("sequence", BigInteger, primary_key=True, autoincrement=True),
|
||||
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
|
||||
Column("event_type", String, nullable=False),
|
||||
Column("subject_kind", String, nullable=False),
|
||||
Column("subject_id", Uuid, nullable=True),
|
||||
Column("actor", String, nullable=False),
|
||||
Column("payload", LargeBinary, nullable=False),
|
||||
Column("payload_digest", LargeBinary, nullable=False),
|
||||
Index("ix_events_subject", "subject_kind", "subject_id"),
|
||||
Index("ix_events_type_sequence", "event_type", "sequence"),
|
||||
)
|
||||
|
||||
|
||||
retention_classes = Table(
|
||||
"retention_classes",
|
||||
metadata,
|
||||
Column("class_id", String, primary_key=True),
|
||||
Column("default_duration_seconds", BigInteger, nullable=True),
|
||||
Column("deletion_strategy", String, nullable=False),
|
||||
)
|
||||
|
||||
|
||||
metadata_schemas = Table(
|
||||
"metadata_schemas",
|
||||
metadata,
|
||||
Column("id", Uuid, primary_key=True),
|
||||
Column("slug", String, nullable=False, unique=True),
|
||||
Column("json_schema", _JSON_TYPE, nullable=False),
|
||||
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
|
||||
)
|
||||
|
||||
|
||||
artifact_packages = Table(
|
||||
"artifact_packages",
|
||||
metadata,
|
||||
Column("id", Uuid, primary_key=True),
|
||||
Column("name", String, nullable=False),
|
||||
Column("producer", String, nullable=False),
|
||||
Column("subject", String, nullable=False),
|
||||
Column(
|
||||
"retention_class",
|
||||
String,
|
||||
ForeignKey("retention_classes.class_id"),
|
||||
nullable=False,
|
||||
),
|
||||
Column(
|
||||
"metadata_schema_id",
|
||||
Uuid,
|
||||
ForeignKey("metadata_schemas.id"),
|
||||
nullable=True,
|
||||
),
|
||||
Column("metadata", _JSON_TYPE, nullable=False),
|
||||
Column("status", String, nullable=False),
|
||||
Column("manifest_digest", LargeBinary, nullable=True),
|
||||
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
|
||||
Column("finalized_at", DateTime(timezone=True), nullable=True),
|
||||
Column("expires_at", DateTime(timezone=True), nullable=True),
|
||||
Column("last_event_sequence", BigInteger, nullable=False),
|
||||
)
|
||||
|
||||
|
||||
artifact_files = Table(
|
||||
"artifact_files",
|
||||
metadata,
|
||||
Column("id", Uuid, primary_key=True),
|
||||
Column(
|
||||
"package_id",
|
||||
Uuid,
|
||||
ForeignKey("artifact_packages.id"),
|
||||
nullable=False,
|
||||
),
|
||||
Column("relative_path", String, nullable=False),
|
||||
Column("media_type", String, nullable=False),
|
||||
Column("size_bytes", BigInteger, nullable=False),
|
||||
Column("digest_algorithm", String, nullable=False),
|
||||
Column("digest_primary", LargeBinary, nullable=False),
|
||||
Column("digest_sha256", LargeBinary, nullable=False),
|
||||
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
|
||||
UniqueConstraint("package_id", "relative_path", name="uq_artifact_files_pkg_path"),
|
||||
)
|
||||
|
||||
|
||||
storage_locations = Table(
|
||||
"storage_locations",
|
||||
metadata,
|
||||
Column("id", Uuid, primary_key=True),
|
||||
Column(
|
||||
"artifact_file_id",
|
||||
Uuid,
|
||||
ForeignKey("artifact_files.id"),
|
||||
nullable=False,
|
||||
),
|
||||
Column("backend_id", String, nullable=False),
|
||||
Column("content_address", String, nullable=False),
|
||||
Column("object_key", String, nullable=False),
|
||||
Column("storage_class", String, nullable=True),
|
||||
Column("retrieval_tier", String, nullable=False, server_default="hot"),
|
||||
Column("restore_status", String, nullable=True),
|
||||
Column("status", String, nullable=False),
|
||||
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
|
||||
Column("last_verified_at", DateTime(timezone=True), nullable=True),
|
||||
Index("ix_storage_locations_content_address", "content_address"),
|
||||
)
|
||||
|
||||
|
||||
retention_state = Table(
|
||||
"retention_state",
|
||||
metadata,
|
||||
Column(
|
||||
"package_id",
|
||||
Uuid,
|
||||
ForeignKey("artifact_packages.id"),
|
||||
primary_key=True,
|
||||
),
|
||||
Column("current_expires_at", DateTime(timezone=True), nullable=True),
|
||||
Column("effective_class", String, nullable=False),
|
||||
Column("active_hold_id", Uuid, nullable=True),
|
||||
Column("eligible_for_deletion", Boolean, nullable=False, server_default=false()),
|
||||
)
|
||||
52
src/artifactstore/db/seed.py
Normal file
52
src/artifactstore/db/seed.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Bootstrap seed data applied by the initial migration.
|
||||
|
||||
The :data:`RETENTION_CLASS_SEEDS` entries match the five v1 retention classes
|
||||
listed in ``docs/ARCHITECTURE-BLUEPRINT.md``. Default durations are intended
|
||||
to be overridable by an operator configuration file (WP-0003); the seed
|
||||
values only ensure the registry has sensible defaults on a fresh DB.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TypedDict
|
||||
|
||||
|
||||
class RetentionClassSeed(TypedDict):
|
||||
class_id: str
|
||||
default_duration_seconds: int | None
|
||||
deletion_strategy: str
|
||||
|
||||
|
||||
_ONE_DAY = 86_400
|
||||
_NINETY_DAYS = 90 * _ONE_DAY
|
||||
_ONE_YEAR = 365 * _ONE_DAY
|
||||
_SEVEN_YEARS = 7 * _ONE_YEAR
|
||||
|
||||
|
||||
RETENTION_CLASS_SEEDS: tuple[RetentionClassSeed, ...] = (
|
||||
{
|
||||
"class_id": "transient",
|
||||
"default_duration_seconds": _ONE_DAY,
|
||||
"deletion_strategy": "mark_eligible",
|
||||
},
|
||||
{
|
||||
"class_id": "raw-evidence",
|
||||
"default_duration_seconds": _NINETY_DAYS,
|
||||
"deletion_strategy": "mark_eligible",
|
||||
},
|
||||
{
|
||||
"class_id": "summary-evidence",
|
||||
"default_duration_seconds": _ONE_YEAR,
|
||||
"deletion_strategy": "mark_eligible",
|
||||
},
|
||||
{
|
||||
"class_id": "release-evidence",
|
||||
"default_duration_seconds": _SEVEN_YEARS,
|
||||
"deletion_strategy": "mark_eligible",
|
||||
},
|
||||
{
|
||||
"class_id": "permanent-record",
|
||||
"default_duration_seconds": None,
|
||||
"deletion_strategy": "mark_eligible",
|
||||
},
|
||||
)
|
||||
Reference in New Issue
Block a user