markitect-main/tests/unit/prompts/test_traceability_service.py

"""
Unit tests for TraceabilityService.

Tests trace_artifact, get_producing_run, get_input_artifacts,
get_generator_runs, and get_validation_history.
"""

import pytest
import tempfile
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock

from markitect.prompts.dependencies.models import DependencyEdge, EdgeType
from markitect.prompts.dependencies.repository import SQLiteDependencyRepository
from markitect.prompts.execution.models import (
    ExecutionStage,
    PromptRun,
    RunConfig,
    RunStatus,
)
from markitect.prompts.models import Artifact, ArtifactType
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
from markitect.prompts.traceability.models import (
    ArtifactLineage,
    ProvenanceTrace,
    RunSummary,
)
from markitect.prompts.traceability.service import TraceabilityService


@pytest.fixture
def temp_db():
    """Create temporary database for testing."""
    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
        db_path = f.name
    yield db_path
    Path(db_path).unlink(missing_ok=True)


@pytest.fixture
def artifact_repo(temp_db):
    """Create artifact repository."""
    return SQLiteArtifactRepository(temp_db)


@pytest.fixture
def dep_repo(temp_db):
    """Create dependency repository."""
    return SQLiteDependencyRepository(temp_db)


@pytest.fixture
def service(artifact_repo, dep_repo, temp_db):
    """Create TraceabilityService."""
    return TraceabilityService(artifact_repo, dep_repo, db_path=temp_db)


def _make_artifact(repo, space_id, name, content="content", atype=ArtifactType.CONTENT):
    """Helper to create and persist an artifact."""
    artifact = Artifact.create(
        space_id=space_id, name=name, content=content, artifact_type=atype
    )
    return repo.create(artifact)


def _make_run(template_id, status=RunStatus.SUCCESS, parent_run_id=None, depth=0):
    """Helper to create a PromptRun."""
    run = PromptRun.create(
        template_id=template_id,
        input_bundle_hash="hash-abc",
        parent_run_id=parent_run_id,
        depth=depth,
    )
    if status == RunStatus.SUCCESS:
        run.mark_complete()
    return run


def _make_edge(repo, source, target, run_id, edge_type=EdgeType.REQUIRES):
    """Helper to create a dependency edge."""
    edge = DependencyEdge.create(
        source_artifact_id=source,
        target_artifact_id=target,
        run_id=run_id,
        edge_type=edge_type,
    )
    return repo.create(edge)


class TestTraceArtifact:
    """Tests for trace_artifact."""

    def test_trace_returns_provenance(self, service, artifact_repo, dep_repo):
        """Test trace_artifact returns a ProvenanceTrace."""
        art = _make_artifact(artifact_repo, "space-1", "output-doc")
        trace = service.trace_artifact(art.id)

        assert isinstance(trace, ProvenanceTrace)
        assert trace.artifact_id == art.id

    def test_trace_with_producing_run(self, service, artifact_repo, dep_repo):
        """Test trace finds producing run via manifest metadata."""
        template = _make_artifact(
            artifact_repo, "space-1", "tmpl", atype=ArtifactType.TEMPLATE
        )
        output = _make_artifact(artifact_repo, "space-1", "output")

        run = _make_run(template.id)
        run.metadata["manifest"] = {
            "output_artifacts": [{"artifact_id": output.id}],
            "resolved_inputs": [],
        }
        service.register_run(run)

        trace = service.trace_artifact(output.id)
        assert trace.producing_run is not None
        assert trace.producing_run.run_id == run.id

    def test_trace_no_producing_run(self, service, artifact_repo):
        """Test trace with no producing run returns None."""
        art = _make_artifact(artifact_repo, "space-1", "standalone")
        trace = service.trace_artifact(art.id)
        assert trace.producing_run is None

    def test_trace_includes_dependency_chain(self, service, artifact_repo, dep_repo):
        """Test trace includes transitive dependency chain."""
        a = _make_artifact(artifact_repo, "s", "a")
        b = _make_artifact(artifact_repo, "s", "b")
        c = _make_artifact(artifact_repo, "s", "c")

        _make_edge(dep_repo, a.id, b.id, "r1")
        _make_edge(dep_repo, b.id, c.id, "r1")

        trace = service.trace_artifact(a.id)
        assert b.id in trace.dependency_chain
        assert c.id in trace.dependency_chain

    def test_trace_to_dict(self, service, artifact_repo):
        """Test ProvenanceTrace serialization."""
        art = _make_artifact(artifact_repo, "s", "x")
        trace = service.trace_artifact(art.id)
        d = trace.to_dict()
        assert d["artifact_id"] == art.id
        assert "producing_run" in d
        assert "input_artifacts" in d
        assert "dependency_chain" in d


class TestGetProducingRun:
    """Tests for get_producing_run."""

    def test_finds_run_via_generates_edge(self, service, artifact_repo, dep_repo):
        """Test finding run via generates dependency edge."""
        output = _make_artifact(artifact_repo, "s", "output")
        run = _make_run("tmpl-1")
        service.register_run(run)

        # Create a "generates" edge from run.id -> output.id
        _make_edge(dep_repo, run.id, output.id, run.id, EdgeType.GENERATES)

        result = service.get_producing_run(output.id)
        assert result is not None
        assert result.run_id == run.id

    def test_finds_run_via_manifest(self, service, artifact_repo):
        """Test finding run via manifest output_artifacts."""
        output = _make_artifact(artifact_repo, "s", "output")
        run = _make_run("tmpl-1")
        run.metadata["manifest"] = {
            "output_artifacts": [{"artifact_id": output.id}],
        }
        service.register_run(run)

        result = service.get_producing_run(output.id)
        assert result is not None
        assert result.run_id == run.id

    def test_returns_none_when_not_found(self, service):
        """Test returns None when no producing run exists."""
        result = service.get_producing_run("nonexistent")
        assert result is None


class TestGetInputArtifacts:
    """Tests for get_input_artifacts."""

    def test_finds_inputs_via_edges(self, service, artifact_repo, dep_repo):
        """Test finding input artifacts via dependency edges."""
        inp = _make_artifact(artifact_repo, "s", "input-data")
        run = _make_run("tmpl-1")
        service.register_run(run)

        _make_edge(dep_repo, inp.id, run.id, run.id, EdgeType.REQUIRES)

        inputs = service.get_input_artifacts(run.id)
        assert len(inputs) == 1
        assert inputs[0].artifact_id == inp.id
        assert inputs[0].role == "input"

    def test_finds_inputs_via_manifest(self, service, artifact_repo, dep_repo):
        """Test finding input artifacts via manifest resolved_inputs."""
        inp = _make_artifact(artifact_repo, "s", "input-data")
        run = _make_run("tmpl-1")
        run.metadata["manifest"] = {
            "resolved_inputs": [{"artifact_id": inp.id}],
            "output_artifacts": [],
        }
        service.register_run(run)

        inputs = service.get_input_artifacts(run.id)
        assert len(inputs) == 1
        assert inputs[0].artifact_id == inp.id

    def test_no_duplicates(self, service, artifact_repo, dep_repo):
        """Test inputs are deduplicated across edges and manifest."""
        inp = _make_artifact(artifact_repo, "s", "input-data")
        run = _make_run("tmpl-1")
        run.metadata["manifest"] = {
            "resolved_inputs": [{"artifact_id": inp.id}],
            "output_artifacts": [],
        }
        service.register_run(run)

        _make_edge(dep_repo, inp.id, run.id, run.id, EdgeType.REQUIRES)

        inputs = service.get_input_artifacts(run.id)
        assert len(inputs) == 1


class TestGetOutputArtifacts:
    """Tests for get_output_artifacts."""

    def test_finds_outputs_via_edges(self, service, artifact_repo, dep_repo):
        """Test finding output artifacts via generates edges."""
        output = _make_artifact(artifact_repo, "s", "output")
        run = _make_run("tmpl-1")
        service.register_run(run)

        _make_edge(dep_repo, run.id, output.id, run.id, EdgeType.GENERATES)

        outputs = service.get_output_artifacts(run.id)
        assert len(outputs) == 1
        assert outputs[0].artifact_id == output.id
        assert outputs[0].role == "output"


class TestGetGeneratorRuns:
    """Tests for get_generator_runs."""

    def test_finds_child_runs(self, service):
        """Test finding nested generator runs."""
        parent = _make_run("tmpl-1")
        child1 = _make_run("tmpl-2", parent_run_id=parent.id, depth=1)
        child2 = _make_run("tmpl-3", parent_run_id=parent.id, depth=1)
        unrelated = _make_run("tmpl-4")

        service.register_run(parent)
        service.register_run(child1)
        service.register_run(child2)
        service.register_run(unrelated)

        children = service.get_generator_runs(parent.id)
        child_ids = {c.run_id for c in children}
        assert child1.id in child_ids
        assert child2.id in child_ids
        assert unrelated.id not in child_ids

    def test_no_children(self, service):
        """Test returns empty when no child runs exist."""
        parent = _make_run("tmpl-1")
        service.register_run(parent)
        assert service.get_generator_runs(parent.id) == []


class TestGetValidationHistory:
    """Tests for get_validation_history."""

    def test_returns_empty_without_db(self, artifact_repo, dep_repo):
        """Test returns empty list when no db_path."""
        svc = TraceabilityService(artifact_repo, dep_repo, db_path=None)
        assert svc.get_validation_history("art-1") == []

    def test_returns_results_with_db(self, service):
        """Test returns results from validator when db_path is set."""
        # Without actually writing validation data, should return empty
        results = service.get_validation_history("art-1")
        assert results == []


class TestGetImpactDebt:
    """Tests for get_impact_debt."""

    def test_returns_empty_without_db(self, artifact_repo, dep_repo):
        """Test returns empty list when no db_path."""
        svc = TraceabilityService(artifact_repo, dep_repo, db_path=None)
        assert svc.get_impact_debt("art-1") == []

    def test_returns_empty_no_debt(self, service):
        """Test returns empty when no debt exists."""
        assert service.get_impact_debt("nonexistent") == []


class TestRunSummary:
    """Tests for RunSummary model."""

    def test_create_and_to_dict(self):
        """Test RunSummary creation and serialization."""
        now = datetime.utcnow()
        summary = RunSummary.create(
            run_id="r1",
            template_id="t1",
            status="success",
            stage="complete",
            input_bundle_hash="hash",
            started_at=now,
            completed_at=now,
        )
        d = summary.to_dict()
        assert d["run_id"] == "r1"
        assert d["status"] == "success"
        assert d["completed_at"] is not None

    def test_optional_fields(self):
        """Test RunSummary with optional fields as None."""
        now = datetime.utcnow()
        summary = RunSummary.create(
            run_id="r1",
            template_id="t1",
            status="pending",
            stage="pending",
            input_bundle_hash="hash",
            started_at=now,
        )
        d = summary.to_dict()
        assert d["parent_run_id"] is None
        assert d["completed_at"] is None
        assert d["depth"] == 0


class TestArtifactLineage:
    """Tests for ArtifactLineage model."""

    def test_to_dict(self):
        """Test ArtifactLineage serialization."""
        lineage = ArtifactLineage(
            artifact_id="a1",
            name="test",
            space_id="s1",
            artifact_type="content",
            content_digest="abc123",
            role="input",
        )
        d = lineage.to_dict()
        assert d["artifact_id"] == "a1"
        assert d["role"] == "input"