"""SQLite asset registry repository.""" from __future__ import annotations import json import sqlite3 from pathlib import Path from typing import Any from kontextual_engine.core import ( Actor, AssetRepresentation, AssetVersion, AuditEvent, ContextEntity, CoreRelationship, DerivedArtifactLineage, IdempotencyRecord, IngestionJob, IngestionJobStatus, KnowledgeAsset, LifecycleState, MetadataRecord, MetadataSchema, MetadataSchemaAssignment, RepresentationKind, RelationshipTargetKind, RetrievalFeedbackRecord, Sensitivity, TransformationRun, TransformationRunStatus, WorkflowRun, WorkflowRunStatus, WorkflowTemplate, ) from kontextual_engine.errors import NotFoundError, ValidationError class SQLiteAssetRegistryRepository: def __init__(self, path: str | Path) -> None: self.path = Path(path) self.path.parent.mkdir(parents=True, exist_ok=True) self._initialize() def save_actor(self, actor: Actor) -> Actor: with self._connect() as conn: conn.execute( """ insert into actors (id, actor_type, payload) values (?, ?, ?) on conflict(id) do update set actor_type=excluded.actor_type, payload=excluded.payload """, (actor.id, actor.actor_type.value, _json(actor.to_dict())), ) return actor def get_actor(self, actor_id: str) -> Actor: row = self._one("select payload from actors where id = ?", (actor_id,)) if row is None: raise NotFoundError("Actor not found", details={"actor_id": actor_id}) return Actor.from_dict(_loads(row["payload"])) def save_asset(self, asset: KnowledgeAsset) -> KnowledgeAsset: with self._connect() as conn: conn.execute( """ insert into assets (id, title, asset_type, lifecycle, payload) values (?, ?, ?, ?, ?) on conflict(id) do update set title=excluded.title, asset_type=excluded.asset_type, lifecycle=excluded.lifecycle, payload=excluded.payload """, ( asset.id, asset.title, asset.classification.asset_type, asset.lifecycle.value, _json(asset.to_dict()), ), ) return asset def get_asset(self, asset_id: str) -> KnowledgeAsset: row = self._one("select payload from assets where id = ?", (asset_id,)) if row is None: raise NotFoundError("Asset not found", details={"asset_id": asset_id}) return KnowledgeAsset.from_dict(_loads(row["payload"])) def list_assets( self, *, lifecycle: LifecycleState | None = None, asset_type: str | None = None, sensitivity: Sensitivity | str | None = None, owner: str | None = None, topic: str | None = None, review_state: str | None = None, metadata_filters: dict[str, Any] | None = None, confirmed_metadata_only: bool = False, ) -> list[KnowledgeAsset]: clauses = [] params: list[Any] = [] if lifecycle is not None: clauses.append("lifecycle = ?") params.append(lifecycle.value) if asset_type is not None: clauses.append("asset_type = ?") params.append(asset_type) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all(f"select payload from assets{where} order by title, id", tuple(params)) assets = [KnowledgeAsset.from_dict(_loads(row["payload"])) for row in rows] if sensitivity is not None: sensitivity = Sensitivity(sensitivity) assets = [asset for asset in assets if asset.classification.sensitivity == sensitivity] if owner is not None: assets = [asset for asset in assets if asset.classification.owner == owner] if topic is not None: assets = [asset for asset in assets if topic in asset.classification.topics] if review_state is not None: assets = [asset for asset in assets if asset.classification.review_state == review_state] if metadata_filters: records_by_asset = self._metadata_records_for_assets([asset.id for asset in assets]) assets = [ asset for asset in assets if _metadata_matches( records_by_asset.get(asset.id, []), metadata_filters, confirmed_metadata_only=confirmed_metadata_only, ) ] return assets def save_representation(self, representation: AssetRepresentation) -> AssetRepresentation: try: with self._connect() as conn: conn.execute( """ insert into representations (id, asset_id, kind, digest, payload) values (?, ?, ?, ?, ?) on conflict(id) do update set asset_id=excluded.asset_id, kind=excluded.kind, digest=excluded.digest, payload=excluded.payload """, ( representation.representation_id, representation.asset_id, representation.kind.value, representation.digest, _json(representation.to_dict()), ), ) except sqlite3.IntegrityError as exc: raise ValidationError( "Representation references an unknown asset", details={ "asset_id": representation.asset_id, "representation_id": representation.representation_id, }, ) from exc return representation def get_representation(self, representation_id: str) -> AssetRepresentation: row = self._one("select payload from representations where id = ?", (representation_id,)) if row is None: raise NotFoundError( "Representation not found", details={"representation_id": representation_id}, ) return AssetRepresentation.from_dict(_loads(row["payload"])) def list_representations( self, *, asset_id: str | None = None, kind: RepresentationKind | None = None, ) -> list[AssetRepresentation]: clauses = [] params: list[Any] = [] if asset_id is not None: clauses.append("asset_id = ?") params.append(asset_id) if kind is not None: clauses.append("kind = ?") params.append(kind.value) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all( f"select payload from representations{where} order by asset_id, kind, id", tuple(params), ) return [AssetRepresentation.from_dict(_loads(row["payload"])) for row in rows] def save_metadata_record(self, asset_id: str, record: MetadataRecord) -> MetadataRecord: try: with self._connect() as conn: conn.execute( """ insert into metadata_records (id, asset_id, key, payload) values (?, ?, ?, ?) on conflict(id) do update set asset_id=excluded.asset_id, key=excluded.key, payload=excluded.payload """, (record.record_id, asset_id, record.key, _json(record.to_dict())), ) except sqlite3.IntegrityError as exc: raise ValidationError( "Metadata record references an unknown asset", details={"asset_id": asset_id, "record_id": record.record_id}, ) from exc return record def list_metadata_records(self, asset_id: str) -> list[MetadataRecord]: rows = self._all( "select payload from metadata_records where asset_id = ? order by key, id", (asset_id,), ) if not rows: self.get_asset(asset_id) return [MetadataRecord.from_dict(_loads(row["payload"])) for row in rows] def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema: with self._connect() as conn: conn.execute( """ insert into metadata_schemas (id, name, version, payload) values (?, ?, ?, ?) on conflict(id) do update set name=excluded.name, version=excluded.version, payload=excluded.payload """, (schema.schema_id, schema.name, schema.version, _json(schema.to_dict())), ) return schema def get_metadata_schema(self, schema_id: str) -> MetadataSchema: row = self._one("select payload from metadata_schemas where id = ?", (schema_id,)) if row is None: raise NotFoundError("Metadata schema not found", details={"schema_id": schema_id}) return MetadataSchema.from_dict(_loads(row["payload"])) def list_metadata_schemas(self) -> list[MetadataSchema]: rows = self._all("select payload from metadata_schemas order by name, id", ()) return [MetadataSchema.from_dict(_loads(row["payload"])) for row in rows] def save_metadata_schema_assignment( self, assignment: MetadataSchemaAssignment, ) -> MetadataSchemaAssignment: self.get_metadata_schema(assignment.schema_id) with self._connect() as conn: conn.execute( """ insert into metadata_schema_assignments (id, schema_id, priority, payload) values (?, ?, ?, ?) on conflict(id) do update set schema_id=excluded.schema_id, priority=excluded.priority, payload=excluded.payload """, ( assignment.assignment_id, assignment.schema_id, assignment.priority, _json(assignment.to_dict()), ), ) return assignment def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment: row = self._one( "select payload from metadata_schema_assignments where id = ?", (assignment_id,), ) if row is None: raise NotFoundError( "Metadata schema assignment not found", details={"assignment_id": assignment_id}, ) return MetadataSchemaAssignment.from_dict(_loads(row["payload"])) def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]: rows = self._all( "select payload from metadata_schema_assignments order by priority, schema_id, id", (), ) return [MetadataSchemaAssignment.from_dict(_loads(row["payload"])) for row in rows] def save_context_entity(self, entity: ContextEntity) -> ContextEntity: with self._connect() as conn: conn.execute( """ insert into context_entities (id, entity_type, name, payload) values (?, ?, ?, ?) on conflict(id) do update set entity_type=excluded.entity_type, name=excluded.name, payload=excluded.payload """, (entity.entity_id, entity.entity_type.value, entity.name, _json(entity.to_dict())), ) return entity def get_context_entity(self, entity_id: str) -> ContextEntity: row = self._one("select payload from context_entities where id = ?", (entity_id,)) if row is None: raise NotFoundError("Context entity not found", details={"entity_id": entity_id}) return ContextEntity.from_dict(_loads(row["payload"])) def list_context_entities(self) -> list[ContextEntity]: rows = self._all("select payload from context_entities order by entity_type, name, id", ()) return [ContextEntity.from_dict(_loads(row["payload"])) for row in rows] def save_relationship(self, relationship: CoreRelationship) -> CoreRelationship: self.get_asset(relationship.source_id) if relationship.target_kind == RelationshipTargetKind.ASSET: self.get_asset(relationship.target_id) else: self.get_context_entity(relationship.target_id) with self._connect() as conn: conn.execute( """ insert into core_relationships (id, source_id, target_id, target_kind, predicate, payload) values (?, ?, ?, ?, ?, ?) on conflict(id) do update set source_id=excluded.source_id, target_id=excluded.target_id, target_kind=excluded.target_kind, predicate=excluded.predicate, payload=excluded.payload """, ( relationship.relationship_id, relationship.source_id, relationship.target_id, relationship.target_kind.value, relationship.predicate, _json(relationship.to_dict()), ), ) return relationship def get_relationship(self, relationship_id: str) -> CoreRelationship: row = self._one("select payload from core_relationships where id = ?", (relationship_id,)) if row is None: raise NotFoundError( "Relationship not found", details={"relationship_id": relationship_id}, ) return CoreRelationship.from_dict(_loads(row["payload"])) def list_relationships( self, *, source_id: str | None = None, target_id: str | None = None, ) -> list[CoreRelationship]: clauses = [] params: list[Any] = [] if source_id is not None: clauses.append("source_id = ?") params.append(source_id) if target_id is not None: clauses.append("target_id = ?") params.append(target_id) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all( f"select payload from core_relationships{where} order by source_id, target_id, predicate, id", tuple(params), ) return [CoreRelationship.from_dict(_loads(row["payload"])) for row in rows] def save_version(self, version: AssetVersion) -> AssetVersion: try: with self._connect() as conn: conn.execute( """ insert into asset_versions (id, asset_id, sequence, change_type, payload) values (?, ?, ?, ?, ?) """, ( version.version_id, version.asset_id, version.sequence, version.change_type.value, _json(version.to_dict()), ), ) except sqlite3.IntegrityError as exc: if _is_foreign_key_error(exc): raise ValidationError( "Version references an unknown asset", details={"asset_id": version.asset_id, "version_id": version.version_id}, ) from exc raise ValidationError( "Version sequence already exists for asset", details={"asset_id": version.asset_id, "sequence": version.sequence}, ) from exc return version def list_versions(self, asset_id: str) -> list[AssetVersion]: rows = self._all( "select payload from asset_versions where asset_id = ? order by sequence", (asset_id,), ) if not rows: self.get_asset(asset_id) return [AssetVersion.from_dict(_loads(row["payload"])) for row in rows] def save_audit_event(self, event: AuditEvent) -> AuditEvent: try: with self._connect() as conn: conn.execute( """ insert into audit_events (id, target, actor_id, correlation_id, outcome, occurred_at, payload) values (?, ?, ?, ?, ?, ?, ?) on conflict(id) do update set target=excluded.target, actor_id=excluded.actor_id, correlation_id=excluded.correlation_id, outcome=excluded.outcome, occurred_at=excluded.occurred_at, payload=excluded.payload """, ( event.event_id, event.target, event.actor_id, event.correlation_id, event.outcome.value, event.occurred_at, _json(event.to_dict()), ), ) except sqlite3.IntegrityError as exc: if _is_foreign_key_error(exc): raise ValidationError( "Audit event references an unknown actor", details={"actor_id": event.actor_id, "event_id": event.event_id}, ) from exc raise return event def get_audit_event(self, event_id: str) -> AuditEvent: row = self._one("select payload from audit_events where id = ?", (event_id,)) if row is None: raise NotFoundError("Audit event not found", details={"event_id": event_id}) return AuditEvent.from_dict(_loads(row["payload"])) def list_audit_events( self, *, target: str | None = None, correlation_id: str | None = None, ) -> list[AuditEvent]: clauses = [] params: list[Any] = [] if target is not None: clauses.append("target = ?") params.append(target) if correlation_id is not None: clauses.append("correlation_id = ?") params.append(correlation_id) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all(f"select payload from audit_events{where} order by occurred_at, rowid", tuple(params)) return [AuditEvent.from_dict(_loads(row["payload"])) for row in rows] def save_retrieval_feedback(self, record: RetrievalFeedbackRecord) -> RetrievalFeedbackRecord: try: with self._connect() as conn: conn.execute( """ insert into retrieval_feedback (id, label, actor_id, correlation_id, created_at, payload) values (?, ?, ?, ?, ?, ?) on conflict(id) do update set label=excluded.label, actor_id=excluded.actor_id, correlation_id=excluded.correlation_id, created_at=excluded.created_at, payload=excluded.payload """, ( record.feedback_id, record.label.value, record.actor_id, record.correlation_id, record.created_at, _json(record.to_dict()), ), ) except sqlite3.IntegrityError as exc: if _is_foreign_key_error(exc): raise ValidationError( "Retrieval feedback references an unknown actor", details={"actor_id": record.actor_id, "feedback_id": record.feedback_id}, ) from exc raise return record def list_retrieval_feedback( self, *, correlation_id: str | None = None, label: str | None = None, ) -> list[RetrievalFeedbackRecord]: clauses = [] params: list[Any] = [] if correlation_id is not None: clauses.append("correlation_id = ?") params.append(correlation_id) if label is not None: clauses.append("label = ?") params.append(label) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all(f"select payload from retrieval_feedback{where} order by created_at, id", tuple(params)) return [RetrievalFeedbackRecord.from_dict(_loads(row["payload"])) for row in rows] def save_idempotency_record(self, record: IdempotencyRecord) -> IdempotencyRecord: with self._connect() as conn: conn.execute( """ insert into idempotency_records (key, operation, request_hash, status, payload) values (?, ?, ?, ?, ?) on conflict(key) do update set operation=excluded.operation, request_hash=excluded.request_hash, status=excluded.status, payload=excluded.payload """, ( record.key, record.operation, record.request_hash, record.status.value, _json(record.to_dict()), ), ) return record def get_idempotency_record(self, key: str) -> IdempotencyRecord | None: row = self._one("select payload from idempotency_records where key = ?", (key,)) if row is None: return None return IdempotencyRecord.from_dict(_loads(row["payload"])) def save_ingestion_job(self, job: IngestionJob) -> IngestionJob: try: with self._connect() as conn: conn.execute( """ insert into ingestion_jobs (id, status, actor_id, correlation_id, created_at, updated_at, payload) values (?, ?, ?, ?, ?, ?, ?) on conflict(id) do update set status=excluded.status, actor_id=excluded.actor_id, correlation_id=excluded.correlation_id, updated_at=excluded.updated_at, payload=excluded.payload """, ( job.job_id, job.status.value, job.actor_id, job.correlation_id, job.created_at, job.updated_at, _json(job.to_dict()), ), ) except sqlite3.IntegrityError as exc: if _is_foreign_key_error(exc): raise ValidationError( "Ingestion job references an unknown actor", details={"actor_id": job.actor_id, "job_id": job.job_id}, ) from exc raise return job def get_ingestion_job(self, job_id: str) -> IngestionJob: row = self._one("select payload from ingestion_jobs where id = ?", (job_id,)) if row is None: raise NotFoundError("Ingestion job not found", details={"job_id": job_id}) return IngestionJob.from_dict(_loads(row["payload"])) def list_ingestion_jobs( self, *, status: IngestionJobStatus | None = None, ) -> list[IngestionJob]: if status is None: rows = self._all("select payload from ingestion_jobs order by created_at, id", ()) else: rows = self._all( "select payload from ingestion_jobs where status = ? order by created_at, id", (status.value,), ) return [IngestionJob.from_dict(_loads(row["payload"])) for row in rows] def save_transformation_run(self, run: TransformationRun) -> TransformationRun: try: with self._connect() as conn: conn.execute( """ insert into transformation_runs (id, operation_id, status, actor_id, correlation_id, queued_at, updated_at, payload) values (?, ?, ?, ?, ?, ?, ?, ?) on conflict(id) do update set operation_id=excluded.operation_id, status=excluded.status, actor_id=excluded.actor_id, correlation_id=excluded.correlation_id, queued_at=excluded.queued_at, updated_at=excluded.updated_at, payload=excluded.payload """, ( run.run_id, run.operation_id, run.status.value, run.actor_id, run.correlation_id, run.queued_at, run.updated_at, _json(run.to_dict()), ), ) except sqlite3.IntegrityError as exc: if _is_foreign_key_error(exc): raise ValidationError( "Transformation run references an unknown actor", details={"actor_id": run.actor_id, "run_id": run.run_id}, ) from exc raise return run def get_transformation_run(self, run_id: str) -> TransformationRun: row = self._one("select payload from transformation_runs where id = ?", (run_id,)) if row is None: raise NotFoundError("Transformation run not found", details={"run_id": run_id}) return TransformationRun.from_dict(_loads(row["payload"])) def list_transformation_runs( self, *, status: TransformationRunStatus | None = None, operation_id: str | None = None, ) -> list[TransformationRun]: clauses = [] params: list[Any] = [] if status is not None: clauses.append("status = ?") params.append(status.value) if operation_id is not None: clauses.append("operation_id = ?") params.append(operation_id) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all(f"select payload from transformation_runs{where} order by queued_at, id", tuple(params)) return [TransformationRun.from_dict(_loads(row["payload"])) for row in rows] def save_derived_lineage(self, lineage: DerivedArtifactLineage) -> DerivedArtifactLineage: try: with self._connect() as conn: conn.execute( """ insert into derived_lineage (id, output_asset_id, transformation_run_id, payload) values (?, ?, ?, ?) on conflict(id) do update set output_asset_id=excluded.output_asset_id, transformation_run_id=excluded.transformation_run_id, payload=excluded.payload """, ( lineage.lineage_id, lineage.output_asset_id, lineage.transformation_run_id, _json(lineage.to_dict()), ), ) except sqlite3.IntegrityError as exc: if _is_foreign_key_error(exc): raise ValidationError( "Derived lineage references an unknown output asset or transformation run", details={ "output_asset_id": lineage.output_asset_id, "transformation_run_id": lineage.transformation_run_id, "lineage_id": lineage.lineage_id, }, ) from exc raise return lineage def get_derived_lineage(self, lineage_id: str) -> DerivedArtifactLineage: row = self._one("select payload from derived_lineage where id = ?", (lineage_id,)) if row is None: raise NotFoundError("Derived lineage not found", details={"lineage_id": lineage_id}) return DerivedArtifactLineage.from_dict(_loads(row["payload"])) def list_derived_lineage( self, *, output_asset_id: str | None = None, source_asset_id: str | None = None, transformation_run_id: str | None = None, ) -> list[DerivedArtifactLineage]: clauses = [] params: list[Any] = [] if output_asset_id is not None: clauses.append("output_asset_id = ?") params.append(output_asset_id) if transformation_run_id is not None: clauses.append("transformation_run_id = ?") params.append(transformation_run_id) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all(f"select payload from derived_lineage{where} order by transformation_run_id, id", tuple(params)) records = [DerivedArtifactLineage.from_dict(_loads(row["payload"])) for row in rows] if source_asset_id is not None: records = [record for record in records if source_asset_id in record.source_asset_ids] return records def save_workflow_template(self, template: WorkflowTemplate) -> WorkflowTemplate: with self._connect() as conn: conn.execute( """ insert into workflow_templates (id, version, name, updated_at, payload) values (?, ?, ?, ?, ?) on conflict(id, version) do update set name=excluded.name, updated_at=excluded.updated_at, payload=excluded.payload """, ( template.template_id, template.version, template.name, template.updated_at, _json(template.to_dict()), ), ) return template def get_workflow_template( self, template_id: str, *, version: str | None = None, ) -> WorkflowTemplate: if version is None: row = self._one( """ select payload from workflow_templates where id = ? order by updated_at desc, version desc limit 1 """, (template_id,), ) else: row = self._one( "select payload from workflow_templates where id = ? and version = ?", (template_id, version), ) if row is None: details = {"template_id": template_id} if version is not None: details["version"] = version raise NotFoundError("Workflow template not found", details=details) return WorkflowTemplate.from_dict(_loads(row["payload"])) def list_workflow_templates( self, *, template_id: str | None = None, ) -> list[WorkflowTemplate]: if template_id is None: rows = self._all("select payload from workflow_templates order by name, version, id", ()) else: rows = self._all( "select payload from workflow_templates where id = ? order by name, version, id", (template_id,), ) return [WorkflowTemplate.from_dict(_loads(row["payload"])) for row in rows] def save_workflow_run(self, run: WorkflowRun) -> WorkflowRun: try: with self._connect() as conn: conn.execute( """ insert into workflow_runs (id, template_id, template_version, status, actor_id, correlation_id, queued_at, updated_at, payload) values (?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict(id) do update set template_id=excluded.template_id, template_version=excluded.template_version, status=excluded.status, actor_id=excluded.actor_id, correlation_id=excluded.correlation_id, queued_at=excluded.queued_at, updated_at=excluded.updated_at, payload=excluded.payload """, ( run.run_id, run.template_id, run.template_version, run.status.value, run.actor_id, run.correlation_id, run.queued_at, run.updated_at, _json(run.to_dict()), ), ) except sqlite3.IntegrityError as exc: if _is_foreign_key_error(exc): raise ValidationError( "Workflow run references an unknown actor or template", details={ "actor_id": run.actor_id, "template_id": run.template_id, "template_version": run.template_version, "run_id": run.run_id, }, ) from exc raise return run def get_workflow_run(self, run_id: str) -> WorkflowRun: row = self._one("select payload from workflow_runs where id = ?", (run_id,)) if row is None: raise NotFoundError("Workflow run not found", details={"run_id": run_id}) return WorkflowRun.from_dict(_loads(row["payload"])) def list_workflow_runs( self, *, status: WorkflowRunStatus | None = None, template_id: str | None = None, ) -> list[WorkflowRun]: clauses = [] params: list[Any] = [] if status is not None: clauses.append("status = ?") params.append(status.value) if template_id is not None: clauses.append("template_id = ?") params.append(template_id) where = f" where {' and '.join(clauses)}" if clauses else "" rows = self._all(f"select payload from workflow_runs{where} order by queued_at, id", tuple(params)) return [WorkflowRun.from_dict(_loads(row["payload"])) for row in rows] def _initialize(self) -> None: with self._connect() as conn: conn.executescript( """ create table if not exists actors ( id text primary key, actor_type text not null, payload text not null ); create table if not exists assets ( id text primary key, title text not null, asset_type text not null, lifecycle text not null, payload text not null ); create table if not exists representations ( id text primary key, asset_id text not null references assets(id) on delete cascade, kind text not null, digest text not null, payload text not null ); create table if not exists metadata_records ( id text primary key, asset_id text not null references assets(id) on delete cascade, key text not null, payload text not null ); create table if not exists metadata_schemas ( id text primary key, name text not null, version text not null, payload text not null ); create table if not exists metadata_schema_assignments ( id text primary key, schema_id text not null references metadata_schemas(id) on delete cascade, priority integer not null, payload text not null ); create table if not exists context_entities ( id text primary key, entity_type text not null, name text not null, payload text not null ); create table if not exists core_relationships ( id text primary key, source_id text not null references assets(id) on delete cascade, target_id text not null, target_kind text not null, predicate text not null, payload text not null ); create table if not exists asset_versions ( id text primary key, asset_id text not null references assets(id) on delete cascade, sequence integer not null, change_type text not null, payload text not null, unique(asset_id, sequence) ); create table if not exists audit_events ( id text primary key, target text not null, actor_id text not null, correlation_id text not null, outcome text not null, occurred_at text not null, payload text not null, foreign key(actor_id) references actors(id) ); create table if not exists retrieval_feedback ( id text primary key, label text not null, actor_id text not null, correlation_id text not null, created_at text not null, payload text not null, foreign key(actor_id) references actors(id) ); create table if not exists idempotency_records ( key text primary key, operation text not null, request_hash text not null, status text not null, payload text not null ); create table if not exists ingestion_jobs ( id text primary key, status text not null, actor_id text not null, correlation_id text not null, created_at text not null, updated_at text not null, payload text not null, foreign key(actor_id) references actors(id) ); create table if not exists transformation_runs ( id text primary key, operation_id text not null, status text not null, actor_id text not null, correlation_id text not null, queued_at text not null, updated_at text not null, payload text not null, foreign key(actor_id) references actors(id) ); create table if not exists derived_lineage ( id text primary key, output_asset_id text not null references assets(id) on delete cascade, transformation_run_id text not null references transformation_runs(id) on delete cascade, payload text not null ); create table if not exists workflow_templates ( id text not null, version text not null, name text not null, updated_at text not null, payload text not null, primary key(id, version) ); create table if not exists workflow_runs ( id text primary key, template_id text not null, template_version text not null, status text not null, actor_id text not null, correlation_id text not null, queued_at text not null, updated_at text not null, payload text not null, foreign key(actor_id) references actors(id), foreign key(template_id, template_version) references workflow_templates(id, version) ); create index if not exists idx_assets_lifecycle on assets(lifecycle); create index if not exists idx_representations_asset on representations(asset_id); create index if not exists idx_metadata_asset on metadata_records(asset_id); create index if not exists idx_schema_assignments_schema on metadata_schema_assignments(schema_id); create index if not exists idx_entities_type on context_entities(entity_type); create index if not exists idx_relationships_source on core_relationships(source_id); create index if not exists idx_relationships_target on core_relationships(target_id); create index if not exists idx_versions_asset on asset_versions(asset_id); create index if not exists idx_audit_target on audit_events(target); create index if not exists idx_audit_correlation on audit_events(correlation_id); create index if not exists idx_retrieval_feedback_label on retrieval_feedback(label); create index if not exists idx_retrieval_feedback_correlation on retrieval_feedback(correlation_id); create index if not exists idx_ingestion_jobs_status on ingestion_jobs(status); create index if not exists idx_ingestion_jobs_correlation on ingestion_jobs(correlation_id); create index if not exists idx_transformation_runs_status on transformation_runs(status); create index if not exists idx_transformation_runs_operation on transformation_runs(operation_id); create index if not exists idx_transformation_runs_correlation on transformation_runs(correlation_id); create index if not exists idx_derived_lineage_output on derived_lineage(output_asset_id); create index if not exists idx_derived_lineage_run on derived_lineage(transformation_run_id); create index if not exists idx_workflow_templates_name on workflow_templates(name); create index if not exists idx_workflow_runs_status on workflow_runs(status); create index if not exists idx_workflow_runs_template on workflow_runs(template_id); create index if not exists idx_workflow_runs_correlation on workflow_runs(correlation_id); """ ) def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.path) conn.row_factory = sqlite3.Row conn.execute("pragma foreign_keys = on") return conn def _one(self, query: str, params: tuple[Any, ...]) -> sqlite3.Row | None: with self._connect() as conn: return conn.execute(query, params).fetchone() def _all(self, query: str, params: tuple[Any, ...]) -> list[sqlite3.Row]: with self._connect() as conn: return list(conn.execute(query, params).fetchall()) def _metadata_records_for_assets(self, asset_ids: list[str]) -> dict[str, list[MetadataRecord]]: if not asset_ids: return {} placeholders = ",".join("?" for _ in asset_ids) rows = self._all( f""" select asset_id, payload from metadata_records where asset_id in ({placeholders}) order by asset_id, key, id """, tuple(asset_ids), ) records: dict[str, list[MetadataRecord]] = {} for row in rows: records.setdefault(row["asset_id"], []).append(MetadataRecord.from_dict(_loads(row["payload"]))) return records def _json(value: dict[str, Any]) -> str: return json.dumps(value, sort_keys=True, separators=(",", ":")) def _is_foreign_key_error(exc: sqlite3.IntegrityError) -> bool: return "FOREIGN KEY" in str(exc).upper() def _loads(value: str) -> dict[str, Any]: return json.loads(value) def _metadata_matches( records: list[MetadataRecord], metadata_filters: dict[str, Any], *, confirmed_metadata_only: bool, ) -> bool: for key, expected in metadata_filters.items(): candidates = [record for record in records if record.key == key] if confirmed_metadata_only: candidates = [record for record in candidates if record.confirmed] if not any(_metadata_value_matches(record.value, expected) for record in candidates): return False return True def _metadata_value_matches(value: Any, expected: Any) -> bool: if isinstance(value, list) and not isinstance(expected, list): return expected in value return value == expected