generated from coulomb/repo-seed
129 lines
5.1 KiB
Python
129 lines
5.1 KiB
Python
"""add token event provenance fields
|
|
|
|
Revision ID: v9q0r1s2t3u4
|
|
Revises: u8p9q0r1s2t3
|
|
Create Date: 2026-05-23
|
|
"""
|
|
from alembic import op
|
|
import sqlalchemy as sa
|
|
from sqlalchemy.dialects import postgresql
|
|
|
|
revision = "v9q0r1s2t3u4"
|
|
down_revision = "u8p9q0r1s2t3"
|
|
branch_labels = None
|
|
depends_on = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column("measurement_kind", sa.Text(), nullable=False, server_default="estimated"),
|
|
)
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column("source_provider", sa.Text(), nullable=False, server_default="manual"),
|
|
)
|
|
op.add_column("token_events", sa.Column("source_id", sa.Text(), nullable=True))
|
|
op.add_column("token_events", sa.Column("source_path", sa.Text(), nullable=True))
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column("source_created_at", sa.TIMESTAMP(timezone=True), nullable=True),
|
|
)
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column("ingested_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
|
|
)
|
|
op.add_column("token_events", sa.Column("parser_version", sa.Text(), nullable=True))
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column("confidence", sa.Float(), nullable=False, server_default="0.35"),
|
|
)
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column("cached_input_tokens", sa.Integer(), nullable=False, server_default="0"),
|
|
)
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column("reasoning_output_tokens", sa.Integer(), nullable=False, server_default="0"),
|
|
)
|
|
op.add_column("token_events", sa.Column("raw_total_tokens", sa.Integer(), nullable=True))
|
|
op.add_column("token_events", sa.Column("cost_estimated_usd", sa.Float(), nullable=True))
|
|
op.add_column(
|
|
"token_events",
|
|
sa.Column(
|
|
"raw_metadata",
|
|
postgresql.JSONB(astext_type=sa.Text()),
|
|
nullable=False,
|
|
server_default=sa.text("'{}'::jsonb"),
|
|
),
|
|
)
|
|
|
|
op.execute(
|
|
"""
|
|
UPDATE token_events
|
|
SET
|
|
measurement_kind = CASE
|
|
WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 'superseded'
|
|
WHEN note = 'workplan' THEN 'allocated'
|
|
WHEN note = 'heuristic' THEN 'estimated'
|
|
WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 'measured'
|
|
ELSE measurement_kind
|
|
END,
|
|
source_provider = CASE
|
|
WHEN note = 'heuristic' THEN 'task_fallback'
|
|
WHEN note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%' THEN 'codex_session'
|
|
WHEN note = 'measured' AND agent ILIKE '%claude%' THEN 'claude_transcript'
|
|
ELSE source_provider
|
|
END,
|
|
source_id = CASE
|
|
WHEN source_id IS NULL AND (note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%')
|
|
THEN ref_id
|
|
ELSE source_id
|
|
END,
|
|
raw_total_tokens = CASE
|
|
WHEN raw_total_tokens IS NULL THEN tokens_in + tokens_out
|
|
ELSE raw_total_tokens
|
|
END,
|
|
confidence = CASE
|
|
WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 0.0
|
|
WHEN note = 'heuristic' THEN 0.35
|
|
WHEN note = 'workplan' THEN 0.70
|
|
WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 1.0
|
|
ELSE confidence
|
|
END
|
|
"""
|
|
)
|
|
|
|
op.create_index("ix_token_events_measurement_kind", "token_events", ["measurement_kind"])
|
|
op.create_index("ix_token_events_source_provider", "token_events", ["source_provider"])
|
|
op.create_index("ix_token_events_source_id", "token_events", ["source_id"])
|
|
op.create_index("ix_token_events_source_created_at", "token_events", ["source_created_at"])
|
|
op.create_index("ix_token_events_ingested_at", "token_events", ["ingested_at"])
|
|
op.create_unique_constraint(
|
|
"uq_token_events_source_identity",
|
|
"token_events",
|
|
["measurement_kind", "source_provider", "source_id"],
|
|
)
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.drop_constraint("uq_token_events_source_identity", "token_events", type_="unique")
|
|
op.drop_index("ix_token_events_ingested_at", table_name="token_events")
|
|
op.drop_index("ix_token_events_source_created_at", table_name="token_events")
|
|
op.drop_index("ix_token_events_source_id", table_name="token_events")
|
|
op.drop_index("ix_token_events_source_provider", table_name="token_events")
|
|
op.drop_index("ix_token_events_measurement_kind", table_name="token_events")
|
|
op.drop_column("token_events", "raw_metadata")
|
|
op.drop_column("token_events", "cost_estimated_usd")
|
|
op.drop_column("token_events", "raw_total_tokens")
|
|
op.drop_column("token_events", "reasoning_output_tokens")
|
|
op.drop_column("token_events", "cached_input_tokens")
|
|
op.drop_column("token_events", "confidence")
|
|
op.drop_column("token_events", "parser_version")
|
|
op.drop_column("token_events", "ingested_at")
|
|
op.drop_column("token_events", "source_created_at")
|
|
op.drop_column("token_events", "source_path")
|
|
op.drop_column("token_events", "source_id")
|
|
op.drop_column("token_events", "source_provider")
|
|
op.drop_column("token_events", "measurement_kind")
|