""" Integration test for full traceability workflow. Tests the complete flow: create artifacts + dependencies → trace provenance → verify lineage with a real SQLite database. """ import pytest import tempfile from pathlib import Path from markitect.prompts.dependencies.models import DependencyEdge, EdgeType from markitect.prompts.dependencies.repository import SQLiteDependencyRepository from markitect.prompts.execution.models import PromptRun, RunConfig from markitect.prompts.models import Artifact, ArtifactType from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository from markitect.prompts.traceability.service import TraceabilityService from markitect.prompts.visualization.graph import GraphExporter from markitect.prompts.queries.operations import PromptQueryService @pytest.fixture def temp_db(): """Create temporary database for testing.""" with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: db_path = f.name yield db_path Path(db_path).unlink(missing_ok=True) @pytest.fixture def artifact_repo(temp_db): return SQLiteArtifactRepository(temp_db) @pytest.fixture def dep_repo(temp_db): return SQLiteDependencyRepository(temp_db) @pytest.fixture def trace_service(artifact_repo, dep_repo, temp_db): return TraceabilityService(artifact_repo, dep_repo, db_path=temp_db) @pytest.fixture def query_service(artifact_repo, dep_repo, temp_db): return PromptQueryService(artifact_repo, dep_repo, db_path=temp_db) def _create_artifact(repo, space_id, name, content="content", atype=ArtifactType.CONTENT): artifact = Artifact.create( space_id=space_id, name=name, content=content, artifact_type=atype, ) return repo.create(artifact) def _create_edge(repo, src, tgt, run_id, edge_type=EdgeType.REQUIRES): edge = DependencyEdge.create( source_artifact_id=src, target_artifact_id=tgt, run_id=run_id, edge_type=edge_type, ) return repo.create(edge) class TestFullTraceabilityWorkflow: """End-to-end traceability workflow test.""" def test_create_trace_verify( self, artifact_repo, dep_repo, trace_service ): """ Full workflow: 1. Create template, input, output artifacts 2. Create dependency edges 3. Register a run 4. Trace the output artifact 5. Verify lineage data """ # Step 1: Create artifacts template = _create_artifact( artifact_repo, "space-1", "my-template", content="Generate {{input}}", atype=ArtifactType.TEMPLATE, ) input_art = _create_artifact( artifact_repo, "space-1", "input-data", content="raw input data", ) output_art = _create_artifact( artifact_repo, "space-1", "output-doc", content="generated output", atype=ArtifactType.GENERATED, ) # Step 2: Create a run run = PromptRun.create( template_id=template.id, input_bundle_hash="test-hash-123", ) run.mark_complete() run.metadata["manifest"] = { "resolved_inputs": [{"artifact_id": input_art.id}], "output_artifacts": [{"artifact_id": output_art.id}], } # Step 3: Create dependency edges _create_edge(dep_repo, input_art.id, template.id, run.id, EdgeType.REQUIRES) _create_edge(dep_repo, run.id, output_art.id, run.id, EdgeType.GENERATES) # Step 4: Register and trace trace_service.register_run(run) trace = trace_service.trace_artifact(output_art.id) # Step 5: Verify assert trace.artifact_id == output_art.id assert trace.producing_run is not None assert trace.producing_run.run_id == run.id assert trace.producing_run.template_id == template.id # Verify serialization d = trace.to_dict() assert d["artifact_id"] == output_art.id assert d["producing_run"]["run_id"] == run.id def test_multi_level_dependency_chain( self, artifact_repo, dep_repo, trace_service ): """Test tracing across a multi-level dependency chain.""" # A -> B -> C (A depends on B, B depends on C) a = _create_artifact(artifact_repo, "s", "a", "content-a") b = _create_artifact(artifact_repo, "s", "b", "content-b") c = _create_artifact(artifact_repo, "s", "c", "content-c") _create_edge(dep_repo, a.id, b.id, "r1") _create_edge(dep_repo, b.id, c.id, "r1") trace = trace_service.trace_artifact(a.id) assert b.id in trace.dependency_chain assert c.id in trace.dependency_chain def test_generator_run_tracing(self, trace_service): """Test tracing nested generator runs.""" parent = PromptRun.create( template_id="tmpl-1", input_bundle_hash="hash-1" ) parent.mark_complete() child = PromptRun.create( template_id="tmpl-2", input_bundle_hash="hash-2", parent_run_id=parent.id, depth=1, ) child.mark_complete() trace_service.register_run(parent) trace_service.register_run(child) # Trace should find generator runs from manifest context trace = trace_service.trace_artifact("dummy-artifact-id") # Generator runs are found via parent_run_id match generators = trace_service.get_generator_runs(parent.id) assert len(generators) == 1 assert generators[0].run_id == child.id assert generators[0].depth == 1 class TestVisualizationIntegration: """Test visualization with real dependency data.""" def test_graph_export_from_real_data(self, artifact_repo, dep_repo): """Test DOT and Mermaid export from real DB data.""" from markitect.prompts.dependencies.graph import GraphBuilder a = _create_artifact(artifact_repo, "s", "a") b = _create_artifact(artifact_repo, "s", "b") c = _create_artifact(artifact_repo, "s", "c") _create_edge(dep_repo, a.id, b.id, "r1", EdgeType.REQUIRES) _create_edge(dep_repo, b.id, c.id, "r1", EdgeType.GENERATES) builder = GraphBuilder(dep_repo) graph = builder.build_graph() dot = GraphExporter.to_dot(graph, "Test Graph") assert "digraph" in dot assert "requires" in dot assert "generates" in dot mermaid = GraphExporter.to_mermaid(graph, "Test Graph") assert "graph LR" in mermaid assert "requires" in mermaid assert "generates" in mermaid class TestQueryServiceIntegration: """Test PromptQueryService with real data.""" def test_dependency_stats_with_data( self, artifact_repo, dep_repo, query_service ): """Test stats with actual artifacts and edges.""" a = _create_artifact(artifact_repo, "s", "root") b = _create_artifact(artifact_repo, "s", "mid") c = _create_artifact(artifact_repo, "s", "leaf") _create_edge(dep_repo, a.id, b.id, "r1") _create_edge(dep_repo, b.id, c.id, "r1") stats = query_service.get_dependency_stats() assert stats["total_nodes"] == 3 assert stats["total_edges"] == 2 assert stats["root_count"] == 1 assert stats["leaf_count"] == 1 assert stats["has_cycles"] is False def test_find_artifacts_by_digest_integration( self, artifact_repo, query_service ): """Test finding artifacts by digest across spaces.""" a1 = _create_artifact(artifact_repo, "space-1", "doc-a", "shared content") a2 = _create_artifact(artifact_repo, "space-2", "doc-b", "shared content") results = query_service.find_artifacts_by_digest(a1.content_digest) assert len(results) == 2 ids = {r["artifact_id"] for r in results} assert a1.id in ids assert a2.id in ids def test_run_history_with_filters(self, query_service): """Test run history with template and status filters.""" from markitect.prompts.execution.models import RunStatus run_ok = PromptRun.create(template_id="t1", input_bundle_hash="h1") run_ok.mark_complete() run_fail = PromptRun.create(template_id="t2", input_bundle_hash="h2") run_fail.mark_failed("error") query_service.register_run(run_ok) query_service.register_run(run_fail) # All runs all_runs = query_service.get_run_history() assert len(all_runs) == 2 # Filter by template t1_runs = query_service.get_run_history(template_id="t1") assert len(t1_runs) == 1 assert t1_runs[0]["template_id"] == "t1" # Filter by status failed = query_service.get_run_history(status="failed") assert len(failed) == 1 assert failed[0]["status"] == "failed"