"""add token event provenance fields Revision ID: v9q0r1s2t3u4 Revises: u8p9q0r1s2t3 Create Date: 2026-05-23 """ from alembic import op import sqlalchemy as sa from sqlalchemy.dialects import postgresql revision = "v9q0r1s2t3u4" down_revision = "u8p9q0r1s2t3" branch_labels = None depends_on = None def upgrade() -> None: op.add_column( "token_events", sa.Column("measurement_kind", sa.Text(), nullable=False, server_default="estimated"), ) op.add_column( "token_events", sa.Column("source_provider", sa.Text(), nullable=False, server_default="manual"), ) op.add_column("token_events", sa.Column("source_id", sa.Text(), nullable=True)) op.add_column("token_events", sa.Column("source_path", sa.Text(), nullable=True)) op.add_column( "token_events", sa.Column("source_created_at", sa.TIMESTAMP(timezone=True), nullable=True), ) op.add_column( "token_events", sa.Column("ingested_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), ) op.add_column("token_events", sa.Column("parser_version", sa.Text(), nullable=True)) op.add_column( "token_events", sa.Column("confidence", sa.Float(), nullable=False, server_default="0.35"), ) op.add_column( "token_events", sa.Column("cached_input_tokens", sa.Integer(), nullable=False, server_default="0"), ) op.add_column( "token_events", sa.Column("reasoning_output_tokens", sa.Integer(), nullable=False, server_default="0"), ) op.add_column("token_events", sa.Column("raw_total_tokens", sa.Integer(), nullable=True)) op.add_column("token_events", sa.Column("cost_estimated_usd", sa.Float(), nullable=True)) op.add_column( "token_events", sa.Column( "raw_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=False, server_default=sa.text("'{}'::jsonb"), ), ) op.execute( """ UPDATE token_events SET measurement_kind = CASE WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 'superseded' WHEN note = 'workplan' THEN 'allocated' WHEN note = 'heuristic' THEN 'estimated' WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 'measured' ELSE measurement_kind END, source_provider = CASE WHEN note = 'heuristic' THEN 'task_fallback' WHEN note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%' THEN 'codex_session' WHEN note = 'measured' AND agent ILIKE '%claude%' THEN 'claude_transcript' ELSE source_provider END, source_id = CASE WHEN source_id IS NULL AND (note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%') THEN ref_id ELSE source_id END, raw_total_tokens = CASE WHEN raw_total_tokens IS NULL THEN tokens_in + tokens_out ELSE raw_total_tokens END, confidence = CASE WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 0.0 WHEN note = 'heuristic' THEN 0.35 WHEN note = 'workplan' THEN 0.70 WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 1.0 ELSE confidence END """ ) op.create_index("ix_token_events_measurement_kind", "token_events", ["measurement_kind"]) op.create_index("ix_token_events_source_provider", "token_events", ["source_provider"]) op.create_index("ix_token_events_source_id", "token_events", ["source_id"]) op.create_index("ix_token_events_source_created_at", "token_events", ["source_created_at"]) op.create_index("ix_token_events_ingested_at", "token_events", ["ingested_at"]) op.create_unique_constraint( "uq_token_events_source_identity", "token_events", ["measurement_kind", "source_provider", "source_id"], ) def downgrade() -> None: op.drop_constraint("uq_token_events_source_identity", "token_events", type_="unique") op.drop_index("ix_token_events_ingested_at", table_name="token_events") op.drop_index("ix_token_events_source_created_at", table_name="token_events") op.drop_index("ix_token_events_source_id", table_name="token_events") op.drop_index("ix_token_events_source_provider", table_name="token_events") op.drop_index("ix_token_events_measurement_kind", table_name="token_events") op.drop_column("token_events", "raw_metadata") op.drop_column("token_events", "cost_estimated_usd") op.drop_column("token_events", "raw_total_tokens") op.drop_column("token_events", "reasoning_output_tokens") op.drop_column("token_events", "cached_input_tokens") op.drop_column("token_events", "confidence") op.drop_column("token_events", "parser_version") op.drop_column("token_events", "ingested_at") op.drop_column("token_events", "source_created_at") op.drop_column("token_events", "source_path") op.drop_column("token_events", "source_id") op.drop_column("token_events", "source_provider") op.drop_column("token_events", "measurement_kind")