markitect-main/markitect/prompts/traceability/service.py

"""
Traceability service for artifact provenance tracking.

Implements FR-11.1: Trace any artifact to its producing PromptTemplate,
input artifacts, generator runs, and quality validation results.

Implements FR-11.2: Enable recomputation based on dependency changes.

Composition layer over existing services — does NOT duplicate data storage.
"""

from typing import Any, Dict, List, Optional

from markitect.prompts.dependencies.graph import GraphBuilder
from markitect.prompts.dependencies.queries import DependencyQueryService
from markitect.prompts.dependencies.repository import IDependencyRepository
from markitect.prompts.execution.models import PromptRun
from markitect.prompts.incremental.engine import IncrementalExecutionEngine
from markitect.prompts.quality.validator import QualityValidator
from markitect.prompts.repositories.interfaces import IArtifactRepository
from markitect.prompts.traceability.models import (
    ArtifactLineage,
    ProvenanceTrace,
    RunSummary,
)


def _run_to_summary(run: PromptRun) -> RunSummary:
    """Convert a PromptRun to a RunSummary."""
    return RunSummary.create(
        run_id=run.id,
        template_id=run.template_id,
        status=run.status.value,
        stage=run.stage.value,
        input_bundle_hash=run.input_bundle_hash,
        started_at=run.started_at,
        parent_run_id=run.parent_run_id,
        depth=run.depth,
        completed_at=run.completed_at,
    )


class TraceabilityService:
    """
    Composition layer for full artifact provenance tracing.

    Delegates to DependencyQueryService, QualityValidator,
    IncrementalExecutionEngine, and direct repository access.
    """

    def __init__(
        self,
        artifact_repo: IArtifactRepository,
        dependency_repo: IDependencyRepository,
        db_path: Optional[str] = None,
    ):
        """
        Compose over existing repos and services.

        Args:
            artifact_repo: Repository for artifact lookups
            dependency_repo: Repository for dependency edge lookups
            db_path: Optional database path for quality/debt services
        """
        self._artifact_repo = artifact_repo
        self._dependency_repo = dependency_repo
        self._db_path = db_path
        self._query_service = DependencyQueryService(dependency_repo)
        self._graph_builder = GraphBuilder(dependency_repo)
        self._validator = QualityValidator(db_path=db_path) if db_path else None
        self._engine = (
            IncrementalExecutionEngine(db_path, self._query_service)
            if db_path
            else None
        )
        # Run registry: external code can register runs for tracing
        self._runs: Dict[str, PromptRun] = {}

    def register_run(self, run: PromptRun) -> None:
        """
        Register a run for traceability lookups.

        Args:
            run: PromptRun to register
        """
        self._runs[run.id] = run

    def trace_artifact(self, artifact_id: str) -> ProvenanceTrace:
        """
        Full provenance trace for an artifact (FR-11.1).

        Args:
            artifact_id: Artifact to trace

        Returns:
            ProvenanceTrace with all provenance data
        """
        trace = ProvenanceTrace(artifact_id=artifact_id)

        # Find producing run
        producing_run = self.get_producing_run(artifact_id)
        trace.producing_run = producing_run

        if producing_run:
            # Get template artifact
            template_artifact = self._artifact_repo.get_by_id(
                producing_run.template_id
            )
            if template_artifact:
                trace.template = ArtifactLineage(
                    artifact_id=template_artifact.id,
                    name=template_artifact.name,
                    space_id=template_artifact.space_id,
                    artifact_type=template_artifact.artifact_type.value,
                    content_digest=template_artifact.content_digest,
                    role="template",
                )

            # Get input and output artifacts
            trace.input_artifacts = self.get_input_artifacts(producing_run.run_id)
            trace.output_artifacts = self.get_output_artifacts(producing_run.run_id)

            # Get generator sub-runs
            trace.generator_runs = self.get_generator_runs(producing_run.run_id)

        # Get validation history
        trace.validation_results = self.get_validation_history(artifact_id)

        # Get impact debt
        trace.impact_debt = self.get_impact_debt(artifact_id)

        # Build dependency chain
        deps = self._query_service.find_transitive_dependencies(artifact_id)
        trace.dependency_chain = sorted(deps)

        return trace

    def get_producing_run(self, artifact_id: str) -> Optional[RunSummary]:
        """
        Find the run that produced an artifact.

        Searches registered runs for one whose manifest lists
        this artifact as an output.

        Args:
            artifact_id: Artifact to find producer of

        Returns:
            RunSummary if found, None otherwise
        """
        # Check dependency edges: find edges where this artifact is a target
        # with edge_type "generates"
        edges = self._dependency_repo.get_by_target(artifact_id)
        for edge in edges:
            # The source is the run or template that generated this artifact
            run = self._runs.get(edge.source_artifact_id)
            if run:
                return _run_to_summary(run)

        # Fallback: search registered runs by manifest metadata
        for run in self._runs.values():
            manifest = run.metadata.get("manifest", {})
            outputs = manifest.get("output_artifacts", [])
            for output in outputs:
                if output.get("artifact_id") == artifact_id:
                    return _run_to_summary(run)

        return None

    def get_input_artifacts(self, run_id: str) -> List[ArtifactLineage]:
        """
        Get all input artifacts for a run.

        Uses dependency edges to find artifacts that the run depends on.

        Args:
            run_id: Run identifier

        Returns:
            List of ArtifactLineage for inputs
        """
        result = []
        # Find edges where this run is the target (artifacts -> run)
        edges = self._dependency_repo.get_by_run(run_id)
        for edge in edges:
            if edge.edge_type.value == "requires":
                artifact = self._artifact_repo.get_by_id(edge.source_artifact_id)
                if artifact:
                    result.append(
                        ArtifactLineage(
                            artifact_id=artifact.id,
                            name=artifact.name,
                            space_id=artifact.space_id,
                            artifact_type=artifact.artifact_type.value,
                            content_digest=artifact.content_digest,
                            role="input",
                        )
                    )
        # Also check manifest resolved_inputs
        run = self._runs.get(run_id)
        if run:
            manifest = run.metadata.get("manifest", {})
            seen_ids = {a.artifact_id for a in result}
            for inp in manifest.get("resolved_inputs", []):
                aid = inp.get("artifact_id", "")
                if aid and aid not in seen_ids:
                    artifact = self._artifact_repo.get_by_id(aid)
                    if artifact:
                        result.append(
                            ArtifactLineage(
                                artifact_id=artifact.id,
                                name=artifact.name,
                                space_id=artifact.space_id,
                                artifact_type=artifact.artifact_type.value,
                                content_digest=artifact.content_digest,
                                role="input",
                            )
                        )
                        seen_ids.add(aid)
        return result

    def get_output_artifacts(self, run_id: str) -> List[ArtifactLineage]:
        """
        Get all output artifacts produced by a run.

        Args:
            run_id: Run identifier

        Returns:
            List of ArtifactLineage for outputs
        """
        result = []
        # Find edges where this run is the source with "generates"
        edges = self._dependency_repo.get_by_run(run_id)
        for edge in edges:
            if edge.edge_type.value == "generates":
                artifact = self._artifact_repo.get_by_id(edge.target_artifact_id)
                if artifact:
                    result.append(
                        ArtifactLineage(
                            artifact_id=artifact.id,
                            name=artifact.name,
                            space_id=artifact.space_id,
                            artifact_type=artifact.artifact_type.value,
                            content_digest=artifact.content_digest,
                            role="output",
                        )
                    )
        # Also check manifest output_artifacts
        run = self._runs.get(run_id)
        if run:
            manifest = run.metadata.get("manifest", {})
            seen_ids = {a.artifact_id for a in result}
            for out in manifest.get("output_artifacts", []):
                aid = out.get("artifact_id", "")
                if aid and aid not in seen_ids:
                    artifact = self._artifact_repo.get_by_id(aid)
                    if artifact:
                        result.append(
                            ArtifactLineage(
                                artifact_id=artifact.id,
                                name=artifact.name,
                                space_id=artifact.space_id,
                                artifact_type=artifact.artifact_type.value,
                                content_digest=artifact.content_digest,
                                role="output",
                            )
                        )
                        seen_ids.add(aid)
        return result

    def get_generator_runs(self, run_id: str) -> List[RunSummary]:
        """
        Get nested generator runs spawned by a run.

        Args:
            run_id: Parent run identifier

        Returns:
            List of RunSummary for child runs
        """
        return [
            _run_to_summary(run)
            for run in self._runs.values()
            if run.parent_run_id == run_id
        ]

    def get_validation_history(self, artifact_id: str) -> List[Dict[str, Any]]:
        """
        Get validation results for an artifact across all runs.

        Args:
            artifact_id: Artifact identifier

        Returns:
            List of validation result dictionaries
        """
        if self._validator:
            return self._validator.get_results_for_artifact(artifact_id)
        return []

    def get_impact_debt(self, artifact_id: str) -> List[Dict[str, Any]]:
        """
        Get suppressed recomputation records for an artifact.

        Args:
            artifact_id: Artifact identifier

        Returns:
            List of impact debt dictionaries
        """
        if self._engine:
            debts = self._engine.get_debt_for_artifact(artifact_id)
            return [d.to_dict() for d in debts]
        return []