Files
kontextual-engine/src/kontextual_engine/adapters/sqlite/asset_registry.py

1089 lines
45 KiB
Python

"""SQLite asset registry repository."""
from __future__ import annotations
import json
import sqlite3
from pathlib import Path
from typing import Any
from kontextual_engine.core import (
Actor,
AssetRepresentation,
AssetVersion,
AuditEvent,
ContextEntity,
CoreRelationship,
DerivedArtifactLineage,
IdempotencyRecord,
IngestionJob,
IngestionJobStatus,
KnowledgeAsset,
LifecycleState,
MetadataRecord,
MetadataSchema,
MetadataSchemaAssignment,
RepresentationKind,
RelationshipTargetKind,
RetrievalFeedbackRecord,
Sensitivity,
TransformationRun,
TransformationRunStatus,
WorkflowRun,
WorkflowRunStatus,
WorkflowTemplate,
)
from kontextual_engine.errors import NotFoundError, ValidationError
class SQLiteAssetRegistryRepository:
def __init__(self, path: str | Path) -> None:
self.path = Path(path)
self.path.parent.mkdir(parents=True, exist_ok=True)
self._initialize()
def save_actor(self, actor: Actor) -> Actor:
with self._connect() as conn:
conn.execute(
"""
insert into actors (id, actor_type, payload)
values (?, ?, ?)
on conflict(id) do update set
actor_type=excluded.actor_type,
payload=excluded.payload
""",
(actor.id, actor.actor_type.value, _json(actor.to_dict())),
)
return actor
def get_actor(self, actor_id: str) -> Actor:
row = self._one("select payload from actors where id = ?", (actor_id,))
if row is None:
raise NotFoundError("Actor not found", details={"actor_id": actor_id})
return Actor.from_dict(_loads(row["payload"]))
def save_asset(self, asset: KnowledgeAsset) -> KnowledgeAsset:
with self._connect() as conn:
conn.execute(
"""
insert into assets (id, title, asset_type, lifecycle, payload)
values (?, ?, ?, ?, ?)
on conflict(id) do update set
title=excluded.title,
asset_type=excluded.asset_type,
lifecycle=excluded.lifecycle,
payload=excluded.payload
""",
(
asset.id,
asset.title,
asset.classification.asset_type,
asset.lifecycle.value,
_json(asset.to_dict()),
),
)
return asset
def get_asset(self, asset_id: str) -> KnowledgeAsset:
row = self._one("select payload from assets where id = ?", (asset_id,))
if row is None:
raise NotFoundError("Asset not found", details={"asset_id": asset_id})
return KnowledgeAsset.from_dict(_loads(row["payload"]))
def list_assets(
self,
*,
lifecycle: LifecycleState | None = None,
asset_type: str | None = None,
sensitivity: Sensitivity | str | None = None,
owner: str | None = None,
topic: str | None = None,
review_state: str | None = None,
metadata_filters: dict[str, Any] | None = None,
confirmed_metadata_only: bool = False,
) -> list[KnowledgeAsset]:
clauses = []
params: list[Any] = []
if lifecycle is not None:
clauses.append("lifecycle = ?")
params.append(lifecycle.value)
if asset_type is not None:
clauses.append("asset_type = ?")
params.append(asset_type)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(f"select payload from assets{where} order by title, id", tuple(params))
assets = [KnowledgeAsset.from_dict(_loads(row["payload"])) for row in rows]
if sensitivity is not None:
sensitivity = Sensitivity(sensitivity)
assets = [asset for asset in assets if asset.classification.sensitivity == sensitivity]
if owner is not None:
assets = [asset for asset in assets if asset.classification.owner == owner]
if topic is not None:
assets = [asset for asset in assets if topic in asset.classification.topics]
if review_state is not None:
assets = [asset for asset in assets if asset.classification.review_state == review_state]
if metadata_filters:
records_by_asset = self._metadata_records_for_assets([asset.id for asset in assets])
assets = [
asset
for asset in assets
if _metadata_matches(
records_by_asset.get(asset.id, []),
metadata_filters,
confirmed_metadata_only=confirmed_metadata_only,
)
]
return assets
def save_representation(self, representation: AssetRepresentation) -> AssetRepresentation:
try:
with self._connect() as conn:
conn.execute(
"""
insert into representations (id, asset_id, kind, digest, payload)
values (?, ?, ?, ?, ?)
on conflict(id) do update set
asset_id=excluded.asset_id,
kind=excluded.kind,
digest=excluded.digest,
payload=excluded.payload
""",
(
representation.representation_id,
representation.asset_id,
representation.kind.value,
representation.digest,
_json(representation.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
raise ValidationError(
"Representation references an unknown asset",
details={
"asset_id": representation.asset_id,
"representation_id": representation.representation_id,
},
) from exc
return representation
def get_representation(self, representation_id: str) -> AssetRepresentation:
row = self._one("select payload from representations where id = ?", (representation_id,))
if row is None:
raise NotFoundError(
"Representation not found",
details={"representation_id": representation_id},
)
return AssetRepresentation.from_dict(_loads(row["payload"]))
def list_representations(
self,
*,
asset_id: str | None = None,
kind: RepresentationKind | None = None,
) -> list[AssetRepresentation]:
clauses = []
params: list[Any] = []
if asset_id is not None:
clauses.append("asset_id = ?")
params.append(asset_id)
if kind is not None:
clauses.append("kind = ?")
params.append(kind.value)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(
f"select payload from representations{where} order by asset_id, kind, id",
tuple(params),
)
return [AssetRepresentation.from_dict(_loads(row["payload"])) for row in rows]
def save_metadata_record(self, asset_id: str, record: MetadataRecord) -> MetadataRecord:
try:
with self._connect() as conn:
conn.execute(
"""
insert into metadata_records (id, asset_id, key, payload)
values (?, ?, ?, ?)
on conflict(id) do update set
asset_id=excluded.asset_id,
key=excluded.key,
payload=excluded.payload
""",
(record.record_id, asset_id, record.key, _json(record.to_dict())),
)
except sqlite3.IntegrityError as exc:
raise ValidationError(
"Metadata record references an unknown asset",
details={"asset_id": asset_id, "record_id": record.record_id},
) from exc
return record
def list_metadata_records(self, asset_id: str) -> list[MetadataRecord]:
rows = self._all(
"select payload from metadata_records where asset_id = ? order by key, id",
(asset_id,),
)
if not rows:
self.get_asset(asset_id)
return [MetadataRecord.from_dict(_loads(row["payload"])) for row in rows]
def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema:
with self._connect() as conn:
conn.execute(
"""
insert into metadata_schemas (id, name, version, payload)
values (?, ?, ?, ?)
on conflict(id) do update set
name=excluded.name,
version=excluded.version,
payload=excluded.payload
""",
(schema.schema_id, schema.name, schema.version, _json(schema.to_dict())),
)
return schema
def get_metadata_schema(self, schema_id: str) -> MetadataSchema:
row = self._one("select payload from metadata_schemas where id = ?", (schema_id,))
if row is None:
raise NotFoundError("Metadata schema not found", details={"schema_id": schema_id})
return MetadataSchema.from_dict(_loads(row["payload"]))
def list_metadata_schemas(self) -> list[MetadataSchema]:
rows = self._all("select payload from metadata_schemas order by name, id", ())
return [MetadataSchema.from_dict(_loads(row["payload"])) for row in rows]
def save_metadata_schema_assignment(
self,
assignment: MetadataSchemaAssignment,
) -> MetadataSchemaAssignment:
self.get_metadata_schema(assignment.schema_id)
with self._connect() as conn:
conn.execute(
"""
insert into metadata_schema_assignments (id, schema_id, priority, payload)
values (?, ?, ?, ?)
on conflict(id) do update set
schema_id=excluded.schema_id,
priority=excluded.priority,
payload=excluded.payload
""",
(
assignment.assignment_id,
assignment.schema_id,
assignment.priority,
_json(assignment.to_dict()),
),
)
return assignment
def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment:
row = self._one(
"select payload from metadata_schema_assignments where id = ?",
(assignment_id,),
)
if row is None:
raise NotFoundError(
"Metadata schema assignment not found",
details={"assignment_id": assignment_id},
)
return MetadataSchemaAssignment.from_dict(_loads(row["payload"]))
def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]:
rows = self._all(
"select payload from metadata_schema_assignments order by priority, schema_id, id",
(),
)
return [MetadataSchemaAssignment.from_dict(_loads(row["payload"])) for row in rows]
def save_context_entity(self, entity: ContextEntity) -> ContextEntity:
with self._connect() as conn:
conn.execute(
"""
insert into context_entities (id, entity_type, name, payload)
values (?, ?, ?, ?)
on conflict(id) do update set
entity_type=excluded.entity_type,
name=excluded.name,
payload=excluded.payload
""",
(entity.entity_id, entity.entity_type.value, entity.name, _json(entity.to_dict())),
)
return entity
def get_context_entity(self, entity_id: str) -> ContextEntity:
row = self._one("select payload from context_entities where id = ?", (entity_id,))
if row is None:
raise NotFoundError("Context entity not found", details={"entity_id": entity_id})
return ContextEntity.from_dict(_loads(row["payload"]))
def list_context_entities(self) -> list[ContextEntity]:
rows = self._all("select payload from context_entities order by entity_type, name, id", ())
return [ContextEntity.from_dict(_loads(row["payload"])) for row in rows]
def save_relationship(self, relationship: CoreRelationship) -> CoreRelationship:
self.get_asset(relationship.source_id)
if relationship.target_kind == RelationshipTargetKind.ASSET:
self.get_asset(relationship.target_id)
else:
self.get_context_entity(relationship.target_id)
with self._connect() as conn:
conn.execute(
"""
insert into core_relationships (id, source_id, target_id, target_kind, predicate, payload)
values (?, ?, ?, ?, ?, ?)
on conflict(id) do update set
source_id=excluded.source_id,
target_id=excluded.target_id,
target_kind=excluded.target_kind,
predicate=excluded.predicate,
payload=excluded.payload
""",
(
relationship.relationship_id,
relationship.source_id,
relationship.target_id,
relationship.target_kind.value,
relationship.predicate,
_json(relationship.to_dict()),
),
)
return relationship
def get_relationship(self, relationship_id: str) -> CoreRelationship:
row = self._one("select payload from core_relationships where id = ?", (relationship_id,))
if row is None:
raise NotFoundError(
"Relationship not found",
details={"relationship_id": relationship_id},
)
return CoreRelationship.from_dict(_loads(row["payload"]))
def list_relationships(
self,
*,
source_id: str | None = None,
target_id: str | None = None,
) -> list[CoreRelationship]:
clauses = []
params: list[Any] = []
if source_id is not None:
clauses.append("source_id = ?")
params.append(source_id)
if target_id is not None:
clauses.append("target_id = ?")
params.append(target_id)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(
f"select payload from core_relationships{where} order by source_id, target_id, predicate, id",
tuple(params),
)
return [CoreRelationship.from_dict(_loads(row["payload"])) for row in rows]
def save_version(self, version: AssetVersion) -> AssetVersion:
try:
with self._connect() as conn:
conn.execute(
"""
insert into asset_versions (id, asset_id, sequence, change_type, payload)
values (?, ?, ?, ?, ?)
""",
(
version.version_id,
version.asset_id,
version.sequence,
version.change_type.value,
_json(version.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Version references an unknown asset",
details={"asset_id": version.asset_id, "version_id": version.version_id},
) from exc
raise ValidationError(
"Version sequence already exists for asset",
details={"asset_id": version.asset_id, "sequence": version.sequence},
) from exc
return version
def list_versions(self, asset_id: str) -> list[AssetVersion]:
rows = self._all(
"select payload from asset_versions where asset_id = ? order by sequence",
(asset_id,),
)
if not rows:
self.get_asset(asset_id)
return [AssetVersion.from_dict(_loads(row["payload"])) for row in rows]
def save_audit_event(self, event: AuditEvent) -> AuditEvent:
try:
with self._connect() as conn:
conn.execute(
"""
insert into audit_events (id, target, actor_id, correlation_id, outcome, occurred_at, payload)
values (?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set
target=excluded.target,
actor_id=excluded.actor_id,
correlation_id=excluded.correlation_id,
outcome=excluded.outcome,
occurred_at=excluded.occurred_at,
payload=excluded.payload
""",
(
event.event_id,
event.target,
event.actor_id,
event.correlation_id,
event.outcome.value,
event.occurred_at,
_json(event.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Audit event references an unknown actor",
details={"actor_id": event.actor_id, "event_id": event.event_id},
) from exc
raise
return event
def get_audit_event(self, event_id: str) -> AuditEvent:
row = self._one("select payload from audit_events where id = ?", (event_id,))
if row is None:
raise NotFoundError("Audit event not found", details={"event_id": event_id})
return AuditEvent.from_dict(_loads(row["payload"]))
def list_audit_events(
self,
*,
target: str | None = None,
correlation_id: str | None = None,
) -> list[AuditEvent]:
clauses = []
params: list[Any] = []
if target is not None:
clauses.append("target = ?")
params.append(target)
if correlation_id is not None:
clauses.append("correlation_id = ?")
params.append(correlation_id)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(f"select payload from audit_events{where} order by occurred_at, rowid", tuple(params))
return [AuditEvent.from_dict(_loads(row["payload"])) for row in rows]
def save_retrieval_feedback(self, record: RetrievalFeedbackRecord) -> RetrievalFeedbackRecord:
try:
with self._connect() as conn:
conn.execute(
"""
insert into retrieval_feedback (id, label, actor_id, correlation_id, created_at, payload)
values (?, ?, ?, ?, ?, ?)
on conflict(id) do update set
label=excluded.label,
actor_id=excluded.actor_id,
correlation_id=excluded.correlation_id,
created_at=excluded.created_at,
payload=excluded.payload
""",
(
record.feedback_id,
record.label.value,
record.actor_id,
record.correlation_id,
record.created_at,
_json(record.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Retrieval feedback references an unknown actor",
details={"actor_id": record.actor_id, "feedback_id": record.feedback_id},
) from exc
raise
return record
def list_retrieval_feedback(
self,
*,
correlation_id: str | None = None,
label: str | None = None,
) -> list[RetrievalFeedbackRecord]:
clauses = []
params: list[Any] = []
if correlation_id is not None:
clauses.append("correlation_id = ?")
params.append(correlation_id)
if label is not None:
clauses.append("label = ?")
params.append(label)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(f"select payload from retrieval_feedback{where} order by created_at, id", tuple(params))
return [RetrievalFeedbackRecord.from_dict(_loads(row["payload"])) for row in rows]
def save_idempotency_record(self, record: IdempotencyRecord) -> IdempotencyRecord:
with self._connect() as conn:
conn.execute(
"""
insert into idempotency_records (key, operation, request_hash, status, payload)
values (?, ?, ?, ?, ?)
on conflict(key) do update set
operation=excluded.operation,
request_hash=excluded.request_hash,
status=excluded.status,
payload=excluded.payload
""",
(
record.key,
record.operation,
record.request_hash,
record.status.value,
_json(record.to_dict()),
),
)
return record
def get_idempotency_record(self, key: str) -> IdempotencyRecord | None:
row = self._one("select payload from idempotency_records where key = ?", (key,))
if row is None:
return None
return IdempotencyRecord.from_dict(_loads(row["payload"]))
def save_ingestion_job(self, job: IngestionJob) -> IngestionJob:
try:
with self._connect() as conn:
conn.execute(
"""
insert into ingestion_jobs (id, status, actor_id, correlation_id, created_at, updated_at, payload)
values (?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set
status=excluded.status,
actor_id=excluded.actor_id,
correlation_id=excluded.correlation_id,
updated_at=excluded.updated_at,
payload=excluded.payload
""",
(
job.job_id,
job.status.value,
job.actor_id,
job.correlation_id,
job.created_at,
job.updated_at,
_json(job.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Ingestion job references an unknown actor",
details={"actor_id": job.actor_id, "job_id": job.job_id},
) from exc
raise
return job
def get_ingestion_job(self, job_id: str) -> IngestionJob:
row = self._one("select payload from ingestion_jobs where id = ?", (job_id,))
if row is None:
raise NotFoundError("Ingestion job not found", details={"job_id": job_id})
return IngestionJob.from_dict(_loads(row["payload"]))
def list_ingestion_jobs(
self,
*,
status: IngestionJobStatus | None = None,
) -> list[IngestionJob]:
if status is None:
rows = self._all("select payload from ingestion_jobs order by created_at, id", ())
else:
rows = self._all(
"select payload from ingestion_jobs where status = ? order by created_at, id",
(status.value,),
)
return [IngestionJob.from_dict(_loads(row["payload"])) for row in rows]
def save_transformation_run(self, run: TransformationRun) -> TransformationRun:
try:
with self._connect() as conn:
conn.execute(
"""
insert into transformation_runs
(id, operation_id, status, actor_id, correlation_id, queued_at, updated_at, payload)
values (?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set
operation_id=excluded.operation_id,
status=excluded.status,
actor_id=excluded.actor_id,
correlation_id=excluded.correlation_id,
queued_at=excluded.queued_at,
updated_at=excluded.updated_at,
payload=excluded.payload
""",
(
run.run_id,
run.operation_id,
run.status.value,
run.actor_id,
run.correlation_id,
run.queued_at,
run.updated_at,
_json(run.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Transformation run references an unknown actor",
details={"actor_id": run.actor_id, "run_id": run.run_id},
) from exc
raise
return run
def get_transformation_run(self, run_id: str) -> TransformationRun:
row = self._one("select payload from transformation_runs where id = ?", (run_id,))
if row is None:
raise NotFoundError("Transformation run not found", details={"run_id": run_id})
return TransformationRun.from_dict(_loads(row["payload"]))
def list_transformation_runs(
self,
*,
status: TransformationRunStatus | None = None,
operation_id: str | None = None,
) -> list[TransformationRun]:
clauses = []
params: list[Any] = []
if status is not None:
clauses.append("status = ?")
params.append(status.value)
if operation_id is not None:
clauses.append("operation_id = ?")
params.append(operation_id)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(f"select payload from transformation_runs{where} order by queued_at, id", tuple(params))
return [TransformationRun.from_dict(_loads(row["payload"])) for row in rows]
def save_derived_lineage(self, lineage: DerivedArtifactLineage) -> DerivedArtifactLineage:
try:
with self._connect() as conn:
conn.execute(
"""
insert into derived_lineage
(id, output_asset_id, transformation_run_id, payload)
values (?, ?, ?, ?)
on conflict(id) do update set
output_asset_id=excluded.output_asset_id,
transformation_run_id=excluded.transformation_run_id,
payload=excluded.payload
""",
(
lineage.lineage_id,
lineage.output_asset_id,
lineage.transformation_run_id,
_json(lineage.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Derived lineage references an unknown output asset or transformation run",
details={
"output_asset_id": lineage.output_asset_id,
"transformation_run_id": lineage.transformation_run_id,
"lineage_id": lineage.lineage_id,
},
) from exc
raise
return lineage
def get_derived_lineage(self, lineage_id: str) -> DerivedArtifactLineage:
row = self._one("select payload from derived_lineage where id = ?", (lineage_id,))
if row is None:
raise NotFoundError("Derived lineage not found", details={"lineage_id": lineage_id})
return DerivedArtifactLineage.from_dict(_loads(row["payload"]))
def list_derived_lineage(
self,
*,
output_asset_id: str | None = None,
source_asset_id: str | None = None,
transformation_run_id: str | None = None,
) -> list[DerivedArtifactLineage]:
clauses = []
params: list[Any] = []
if output_asset_id is not None:
clauses.append("output_asset_id = ?")
params.append(output_asset_id)
if transformation_run_id is not None:
clauses.append("transformation_run_id = ?")
params.append(transformation_run_id)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(f"select payload from derived_lineage{where} order by transformation_run_id, id", tuple(params))
records = [DerivedArtifactLineage.from_dict(_loads(row["payload"])) for row in rows]
if source_asset_id is not None:
records = [record for record in records if source_asset_id in record.source_asset_ids]
return records
def save_workflow_template(self, template: WorkflowTemplate) -> WorkflowTemplate:
with self._connect() as conn:
conn.execute(
"""
insert into workflow_templates
(id, version, name, updated_at, payload)
values (?, ?, ?, ?, ?)
on conflict(id, version) do update set
name=excluded.name,
updated_at=excluded.updated_at,
payload=excluded.payload
""",
(
template.template_id,
template.version,
template.name,
template.updated_at,
_json(template.to_dict()),
),
)
return template
def get_workflow_template(
self,
template_id: str,
*,
version: str | None = None,
) -> WorkflowTemplate:
if version is None:
row = self._one(
"""
select payload from workflow_templates
where id = ?
order by updated_at desc, version desc
limit 1
""",
(template_id,),
)
else:
row = self._one(
"select payload from workflow_templates where id = ? and version = ?",
(template_id, version),
)
if row is None:
details = {"template_id": template_id}
if version is not None:
details["version"] = version
raise NotFoundError("Workflow template not found", details=details)
return WorkflowTemplate.from_dict(_loads(row["payload"]))
def list_workflow_templates(
self,
*,
template_id: str | None = None,
) -> list[WorkflowTemplate]:
if template_id is None:
rows = self._all("select payload from workflow_templates order by name, version, id", ())
else:
rows = self._all(
"select payload from workflow_templates where id = ? order by name, version, id",
(template_id,),
)
return [WorkflowTemplate.from_dict(_loads(row["payload"])) for row in rows]
def save_workflow_run(self, run: WorkflowRun) -> WorkflowRun:
try:
with self._connect() as conn:
conn.execute(
"""
insert into workflow_runs
(id, template_id, template_version, status, actor_id, correlation_id,
queued_at, updated_at, payload)
values (?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set
template_id=excluded.template_id,
template_version=excluded.template_version,
status=excluded.status,
actor_id=excluded.actor_id,
correlation_id=excluded.correlation_id,
queued_at=excluded.queued_at,
updated_at=excluded.updated_at,
payload=excluded.payload
""",
(
run.run_id,
run.template_id,
run.template_version,
run.status.value,
run.actor_id,
run.correlation_id,
run.queued_at,
run.updated_at,
_json(run.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Workflow run references an unknown actor or template",
details={
"actor_id": run.actor_id,
"template_id": run.template_id,
"template_version": run.template_version,
"run_id": run.run_id,
},
) from exc
raise
return run
def get_workflow_run(self, run_id: str) -> WorkflowRun:
row = self._one("select payload from workflow_runs where id = ?", (run_id,))
if row is None:
raise NotFoundError("Workflow run not found", details={"run_id": run_id})
return WorkflowRun.from_dict(_loads(row["payload"]))
def list_workflow_runs(
self,
*,
status: WorkflowRunStatus | None = None,
template_id: str | None = None,
) -> list[WorkflowRun]:
clauses = []
params: list[Any] = []
if status is not None:
clauses.append("status = ?")
params.append(status.value)
if template_id is not None:
clauses.append("template_id = ?")
params.append(template_id)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(f"select payload from workflow_runs{where} order by queued_at, id", tuple(params))
return [WorkflowRun.from_dict(_loads(row["payload"])) for row in rows]
def _initialize(self) -> None:
with self._connect() as conn:
conn.executescript(
"""
create table if not exists actors (
id text primary key,
actor_type text not null,
payload text not null
);
create table if not exists assets (
id text primary key,
title text not null,
asset_type text not null,
lifecycle text not null,
payload text not null
);
create table if not exists representations (
id text primary key,
asset_id text not null references assets(id) on delete cascade,
kind text not null,
digest text not null,
payload text not null
);
create table if not exists metadata_records (
id text primary key,
asset_id text not null references assets(id) on delete cascade,
key text not null,
payload text not null
);
create table if not exists metadata_schemas (
id text primary key,
name text not null,
version text not null,
payload text not null
);
create table if not exists metadata_schema_assignments (
id text primary key,
schema_id text not null references metadata_schemas(id) on delete cascade,
priority integer not null,
payload text not null
);
create table if not exists context_entities (
id text primary key,
entity_type text not null,
name text not null,
payload text not null
);
create table if not exists core_relationships (
id text primary key,
source_id text not null references assets(id) on delete cascade,
target_id text not null,
target_kind text not null,
predicate text not null,
payload text not null
);
create table if not exists asset_versions (
id text primary key,
asset_id text not null references assets(id) on delete cascade,
sequence integer not null,
change_type text not null,
payload text not null,
unique(asset_id, sequence)
);
create table if not exists audit_events (
id text primary key,
target text not null,
actor_id text not null,
correlation_id text not null,
outcome text not null,
occurred_at text not null,
payload text not null,
foreign key(actor_id) references actors(id)
);
create table if not exists retrieval_feedback (
id text primary key,
label text not null,
actor_id text not null,
correlation_id text not null,
created_at text not null,
payload text not null,
foreign key(actor_id) references actors(id)
);
create table if not exists idempotency_records (
key text primary key,
operation text not null,
request_hash text not null,
status text not null,
payload text not null
);
create table if not exists ingestion_jobs (
id text primary key,
status text not null,
actor_id text not null,
correlation_id text not null,
created_at text not null,
updated_at text not null,
payload text not null,
foreign key(actor_id) references actors(id)
);
create table if not exists transformation_runs (
id text primary key,
operation_id text not null,
status text not null,
actor_id text not null,
correlation_id text not null,
queued_at text not null,
updated_at text not null,
payload text not null,
foreign key(actor_id) references actors(id)
);
create table if not exists derived_lineage (
id text primary key,
output_asset_id text not null references assets(id) on delete cascade,
transformation_run_id text not null references transformation_runs(id) on delete cascade,
payload text not null
);
create table if not exists workflow_templates (
id text not null,
version text not null,
name text not null,
updated_at text not null,
payload text not null,
primary key(id, version)
);
create table if not exists workflow_runs (
id text primary key,
template_id text not null,
template_version text not null,
status text not null,
actor_id text not null,
correlation_id text not null,
queued_at text not null,
updated_at text not null,
payload text not null,
foreign key(actor_id) references actors(id),
foreign key(template_id, template_version)
references workflow_templates(id, version)
);
create index if not exists idx_assets_lifecycle on assets(lifecycle);
create index if not exists idx_representations_asset on representations(asset_id);
create index if not exists idx_metadata_asset on metadata_records(asset_id);
create index if not exists idx_schema_assignments_schema on metadata_schema_assignments(schema_id);
create index if not exists idx_entities_type on context_entities(entity_type);
create index if not exists idx_relationships_source on core_relationships(source_id);
create index if not exists idx_relationships_target on core_relationships(target_id);
create index if not exists idx_versions_asset on asset_versions(asset_id);
create index if not exists idx_audit_target on audit_events(target);
create index if not exists idx_audit_correlation on audit_events(correlation_id);
create index if not exists idx_retrieval_feedback_label on retrieval_feedback(label);
create index if not exists idx_retrieval_feedback_correlation on retrieval_feedback(correlation_id);
create index if not exists idx_ingestion_jobs_status on ingestion_jobs(status);
create index if not exists idx_ingestion_jobs_correlation on ingestion_jobs(correlation_id);
create index if not exists idx_transformation_runs_status on transformation_runs(status);
create index if not exists idx_transformation_runs_operation on transformation_runs(operation_id);
create index if not exists idx_transformation_runs_correlation on transformation_runs(correlation_id);
create index if not exists idx_derived_lineage_output on derived_lineage(output_asset_id);
create index if not exists idx_derived_lineage_run on derived_lineage(transformation_run_id);
create index if not exists idx_workflow_templates_name on workflow_templates(name);
create index if not exists idx_workflow_runs_status on workflow_runs(status);
create index if not exists idx_workflow_runs_template on workflow_runs(template_id);
create index if not exists idx_workflow_runs_correlation on workflow_runs(correlation_id);
"""
)
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(self.path)
conn.row_factory = sqlite3.Row
conn.execute("pragma foreign_keys = on")
return conn
def _one(self, query: str, params: tuple[Any, ...]) -> sqlite3.Row | None:
with self._connect() as conn:
return conn.execute(query, params).fetchone()
def _all(self, query: str, params: tuple[Any, ...]) -> list[sqlite3.Row]:
with self._connect() as conn:
return list(conn.execute(query, params).fetchall())
def _metadata_records_for_assets(self, asset_ids: list[str]) -> dict[str, list[MetadataRecord]]:
if not asset_ids:
return {}
placeholders = ",".join("?" for _ in asset_ids)
rows = self._all(
f"""
select asset_id, payload from metadata_records
where asset_id in ({placeholders})
order by asset_id, key, id
""",
tuple(asset_ids),
)
records: dict[str, list[MetadataRecord]] = {}
for row in rows:
records.setdefault(row["asset_id"], []).append(MetadataRecord.from_dict(_loads(row["payload"])))
return records
def _json(value: dict[str, Any]) -> str:
return json.dumps(value, sort_keys=True, separators=(",", ":"))
def _is_foreign_key_error(exc: sqlite3.IntegrityError) -> bool:
return "FOREIGN KEY" in str(exc).upper()
def _loads(value: str) -> dict[str, Any]:
return json.loads(value)
def _metadata_matches(
records: list[MetadataRecord],
metadata_filters: dict[str, Any],
*,
confirmed_metadata_only: bool,
) -> bool:
for key, expected in metadata_filters.items():
candidates = [record for record in records if record.key == key]
if confirmed_metadata_only:
candidates = [record for record in candidates if record.confirmed]
if not any(_metadata_value_matches(record.value, expected) for record in candidates):
return False
return True
def _metadata_value_matches(value: Any, expected: Any) -> bool:
if isinstance(value, list) and not isinstance(expected, list):
return expected in value
return value == expected