""" Traceability service for artifact provenance tracking. Implements FR-11.1: Trace any artifact to its producing PromptTemplate, input artifacts, generator runs, and quality validation results. Implements FR-11.2: Enable recomputation based on dependency changes. Composition layer over existing services — does NOT duplicate data storage. """ from typing import Any, Dict, List, Optional from markitect.prompts.dependencies.graph import GraphBuilder from markitect.prompts.dependencies.queries import DependencyQueryService from markitect.prompts.dependencies.repository import IDependencyRepository from markitect.prompts.execution.models import PromptRun from markitect.prompts.incremental.engine import IncrementalExecutionEngine from markitect.prompts.quality.validator import QualityValidator from markitect.prompts.repositories.interfaces import IArtifactRepository from markitect.prompts.traceability.models import ( ArtifactLineage, ProvenanceTrace, RunSummary, ) def _run_to_summary(run: PromptRun) -> RunSummary: """Convert a PromptRun to a RunSummary.""" return RunSummary.create( run_id=run.id, template_id=run.template_id, status=run.status.value, stage=run.stage.value, input_bundle_hash=run.input_bundle_hash, started_at=run.started_at, parent_run_id=run.parent_run_id, depth=run.depth, completed_at=run.completed_at, ) class TraceabilityService: """ Composition layer for full artifact provenance tracing. Delegates to DependencyQueryService, QualityValidator, IncrementalExecutionEngine, and direct repository access. """ def __init__( self, artifact_repo: IArtifactRepository, dependency_repo: IDependencyRepository, db_path: Optional[str] = None, ): """ Compose over existing repos and services. Args: artifact_repo: Repository for artifact lookups dependency_repo: Repository for dependency edge lookups db_path: Optional database path for quality/debt services """ self._artifact_repo = artifact_repo self._dependency_repo = dependency_repo self._db_path = db_path self._query_service = DependencyQueryService(dependency_repo) self._graph_builder = GraphBuilder(dependency_repo) self._validator = QualityValidator(db_path=db_path) if db_path else None self._engine = ( IncrementalExecutionEngine(db_path, self._query_service) if db_path else None ) # Run registry: external code can register runs for tracing self._runs: Dict[str, PromptRun] = {} def register_run(self, run: PromptRun) -> None: """ Register a run for traceability lookups. Args: run: PromptRun to register """ self._runs[run.id] = run def trace_artifact(self, artifact_id: str) -> ProvenanceTrace: """ Full provenance trace for an artifact (FR-11.1). Args: artifact_id: Artifact to trace Returns: ProvenanceTrace with all provenance data """ trace = ProvenanceTrace(artifact_id=artifact_id) # Find producing run producing_run = self.get_producing_run(artifact_id) trace.producing_run = producing_run if producing_run: # Get template artifact template_artifact = self._artifact_repo.get_by_id( producing_run.template_id ) if template_artifact: trace.template = ArtifactLineage( artifact_id=template_artifact.id, name=template_artifact.name, space_id=template_artifact.space_id, artifact_type=template_artifact.artifact_type.value, content_digest=template_artifact.content_digest, role="template", ) # Get input and output artifacts trace.input_artifacts = self.get_input_artifacts(producing_run.run_id) trace.output_artifacts = self.get_output_artifacts(producing_run.run_id) # Get generator sub-runs trace.generator_runs = self.get_generator_runs(producing_run.run_id) # Get validation history trace.validation_results = self.get_validation_history(artifact_id) # Get impact debt trace.impact_debt = self.get_impact_debt(artifact_id) # Build dependency chain deps = self._query_service.find_transitive_dependencies(artifact_id) trace.dependency_chain = sorted(deps) return trace def get_producing_run(self, artifact_id: str) -> Optional[RunSummary]: """ Find the run that produced an artifact. Searches registered runs for one whose manifest lists this artifact as an output. Args: artifact_id: Artifact to find producer of Returns: RunSummary if found, None otherwise """ # Check dependency edges: find edges where this artifact is a target # with edge_type "generates" edges = self._dependency_repo.get_by_target(artifact_id) for edge in edges: # The source is the run or template that generated this artifact run = self._runs.get(edge.source_artifact_id) if run: return _run_to_summary(run) # Fallback: search registered runs by manifest metadata for run in self._runs.values(): manifest = run.metadata.get("manifest", {}) outputs = manifest.get("output_artifacts", []) for output in outputs: if output.get("artifact_id") == artifact_id: return _run_to_summary(run) return None def get_input_artifacts(self, run_id: str) -> List[ArtifactLineage]: """ Get all input artifacts for a run. Uses dependency edges to find artifacts that the run depends on. Args: run_id: Run identifier Returns: List of ArtifactLineage for inputs """ result = [] # Find edges where this run is the target (artifacts -> run) edges = self._dependency_repo.get_by_run(run_id) for edge in edges: if edge.edge_type.value == "requires": artifact = self._artifact_repo.get_by_id(edge.source_artifact_id) if artifact: result.append( ArtifactLineage( artifact_id=artifact.id, name=artifact.name, space_id=artifact.space_id, artifact_type=artifact.artifact_type.value, content_digest=artifact.content_digest, role="input", ) ) # Also check manifest resolved_inputs run = self._runs.get(run_id) if run: manifest = run.metadata.get("manifest", {}) seen_ids = {a.artifact_id for a in result} for inp in manifest.get("resolved_inputs", []): aid = inp.get("artifact_id", "") if aid and aid not in seen_ids: artifact = self._artifact_repo.get_by_id(aid) if artifact: result.append( ArtifactLineage( artifact_id=artifact.id, name=artifact.name, space_id=artifact.space_id, artifact_type=artifact.artifact_type.value, content_digest=artifact.content_digest, role="input", ) ) seen_ids.add(aid) return result def get_output_artifacts(self, run_id: str) -> List[ArtifactLineage]: """ Get all output artifacts produced by a run. Args: run_id: Run identifier Returns: List of ArtifactLineage for outputs """ result = [] # Find edges where this run is the source with "generates" edges = self._dependency_repo.get_by_run(run_id) for edge in edges: if edge.edge_type.value == "generates": artifact = self._artifact_repo.get_by_id(edge.target_artifact_id) if artifact: result.append( ArtifactLineage( artifact_id=artifact.id, name=artifact.name, space_id=artifact.space_id, artifact_type=artifact.artifact_type.value, content_digest=artifact.content_digest, role="output", ) ) # Also check manifest output_artifacts run = self._runs.get(run_id) if run: manifest = run.metadata.get("manifest", {}) seen_ids = {a.artifact_id for a in result} for out in manifest.get("output_artifacts", []): aid = out.get("artifact_id", "") if aid and aid not in seen_ids: artifact = self._artifact_repo.get_by_id(aid) if artifact: result.append( ArtifactLineage( artifact_id=artifact.id, name=artifact.name, space_id=artifact.space_id, artifact_type=artifact.artifact_type.value, content_digest=artifact.content_digest, role="output", ) ) seen_ids.add(aid) return result def get_generator_runs(self, run_id: str) -> List[RunSummary]: """ Get nested generator runs spawned by a run. Args: run_id: Parent run identifier Returns: List of RunSummary for child runs """ return [ _run_to_summary(run) for run in self._runs.values() if run.parent_run_id == run_id ] def get_validation_history(self, artifact_id: str) -> List[Dict[str, Any]]: """ Get validation results for an artifact across all runs. Args: artifact_id: Artifact identifier Returns: List of validation result dictionaries """ if self._validator: return self._validator.get_results_for_artifact(artifact_id) return [] def get_impact_debt(self, artifact_id: str) -> List[Dict[str, Any]]: """ Get suppressed recomputation records for an artifact. Args: artifact_id: Artifact identifier Returns: List of impact debt dictionaries """ if self._engine: debts = self._engine.get_debt_for_artifact(artifact_id) return [d.to_dict() for d in debts] return []