Files
markitect-main/tests/integration/prompts/test_traceability_workflow.py
tegwick 7b4bd461c9 feat(prompts): implement Phase 8 - Observability & Traceability (FR-11)
Complete implementation of Phase 8, the final phase of prompt dependency
resolution infrastructure, adding full observability and traceability.

## Features (FR-11)

### FR-11.1: Complete Artifact Provenance Tracing
- TraceabilityService: composition layer for full artifact lineage
- Trace any artifact to producing PromptTemplate, input artifacts,
  generator runs, and quality validation results
- ProvenanceTrace model with complete dependency chain reconstruction
- RunSummary and ArtifactLineage models for structured trace output

### FR-11.2: Recomputation Query Infrastructure
- PromptQueryService: cross-service complex queries
- Run history queries with template and status filters
- Stale artifact detection via impact debt analysis
- Dependency graph statistics (nodes, edges, cycles, roots, leaves)
- Content-based artifact lookups by digest

### Visualization Support
- GraphExporter: DOT (Graphviz) and Mermaid format export
- Supports all edge types (requires, generates, includes)
- Handles isolated nodes, linear chains, diamonds, and complex graphs

### CLI Commands (prompt group)
- `prompt trace <artifact_id>` - Full provenance trace as JSON
- `prompt graph <artifact_id>` - Dependency graph (DOT/Mermaid)
- `prompt runs` - List execution runs with filters
- `prompt debt` - Show impact debt and stale artifacts
- `prompt stats` - Dependency graph statistics

## Implementation

Source files (8):
- markitect/prompts/traceability/models.py - Trace data models
- markitect/prompts/traceability/service.py - TraceabilityService
- markitect/prompts/visualization/graph.py - Graph export
- markitect/prompts/queries/operations.py - PromptQueryService
- markitect/prompts/cli.py - Click CLI commands
- Package __init__.py files (3)

Tests (64 total, all passing):
- tests/unit/prompts/test_traceability_service.py (21 tests)
- tests/unit/prompts/test_visualization.py (14 tests)
- tests/unit/prompts/test_query_operations.py (12 tests)
- tests/integration/prompts/test_traceability_workflow.py (7 tests)
- tests/integration/prompts/test_prompt_cli.py (10 tests)

## Architecture

TraceabilityService is a composition layer that delegates to:
- DependencyQueryService (transitive dependency lookups)
- QualityValidator (validation history)
- IncrementalExecutionEngine (impact debt queries)
- Direct repository access (artifacts, edges)

No duplicate data storage - all data comes from existing Phase 1-7
infrastructure (artifact repo, dependency repo, validation DB, debt DB).

## Verification

All 2250 tests pass with 0 regressions.
Phase 8 completes the full 8-phase implementation roadmap.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-09 20:32:18 +01:00

256 lines
9.0 KiB
Python

"""
Integration test for full traceability workflow.
Tests the complete flow: create artifacts + dependencies → trace provenance
→ verify lineage with a real SQLite database.
"""
import pytest
import tempfile
from pathlib import Path
from markitect.prompts.dependencies.models import DependencyEdge, EdgeType
from markitect.prompts.dependencies.repository import SQLiteDependencyRepository
from markitect.prompts.execution.models import PromptRun, RunConfig
from markitect.prompts.models import Artifact, ArtifactType
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
from markitect.prompts.traceability.service import TraceabilityService
from markitect.prompts.visualization.graph import GraphExporter
from markitect.prompts.queries.operations import PromptQueryService
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def artifact_repo(temp_db):
return SQLiteArtifactRepository(temp_db)
@pytest.fixture
def dep_repo(temp_db):
return SQLiteDependencyRepository(temp_db)
@pytest.fixture
def trace_service(artifact_repo, dep_repo, temp_db):
return TraceabilityService(artifact_repo, dep_repo, db_path=temp_db)
@pytest.fixture
def query_service(artifact_repo, dep_repo, temp_db):
return PromptQueryService(artifact_repo, dep_repo, db_path=temp_db)
def _create_artifact(repo, space_id, name, content="content", atype=ArtifactType.CONTENT):
artifact = Artifact.create(
space_id=space_id, name=name, content=content, artifact_type=atype,
)
return repo.create(artifact)
def _create_edge(repo, src, tgt, run_id, edge_type=EdgeType.REQUIRES):
edge = DependencyEdge.create(
source_artifact_id=src,
target_artifact_id=tgt,
run_id=run_id,
edge_type=edge_type,
)
return repo.create(edge)
class TestFullTraceabilityWorkflow:
"""End-to-end traceability workflow test."""
def test_create_trace_verify(
self, artifact_repo, dep_repo, trace_service
):
"""
Full workflow:
1. Create template, input, output artifacts
2. Create dependency edges
3. Register a run
4. Trace the output artifact
5. Verify lineage data
"""
# Step 1: Create artifacts
template = _create_artifact(
artifact_repo, "space-1", "my-template",
content="Generate {{input}}", atype=ArtifactType.TEMPLATE,
)
input_art = _create_artifact(
artifact_repo, "space-1", "input-data",
content="raw input data",
)
output_art = _create_artifact(
artifact_repo, "space-1", "output-doc",
content="generated output", atype=ArtifactType.GENERATED,
)
# Step 2: Create a run
run = PromptRun.create(
template_id=template.id,
input_bundle_hash="test-hash-123",
)
run.mark_complete()
run.metadata["manifest"] = {
"resolved_inputs": [{"artifact_id": input_art.id}],
"output_artifacts": [{"artifact_id": output_art.id}],
}
# Step 3: Create dependency edges
_create_edge(dep_repo, input_art.id, template.id, run.id, EdgeType.REQUIRES)
_create_edge(dep_repo, run.id, output_art.id, run.id, EdgeType.GENERATES)
# Step 4: Register and trace
trace_service.register_run(run)
trace = trace_service.trace_artifact(output_art.id)
# Step 5: Verify
assert trace.artifact_id == output_art.id
assert trace.producing_run is not None
assert trace.producing_run.run_id == run.id
assert trace.producing_run.template_id == template.id
# Verify serialization
d = trace.to_dict()
assert d["artifact_id"] == output_art.id
assert d["producing_run"]["run_id"] == run.id
def test_multi_level_dependency_chain(
self, artifact_repo, dep_repo, trace_service
):
"""Test tracing across a multi-level dependency chain."""
# A -> B -> C (A depends on B, B depends on C)
a = _create_artifact(artifact_repo, "s", "a", "content-a")
b = _create_artifact(artifact_repo, "s", "b", "content-b")
c = _create_artifact(artifact_repo, "s", "c", "content-c")
_create_edge(dep_repo, a.id, b.id, "r1")
_create_edge(dep_repo, b.id, c.id, "r1")
trace = trace_service.trace_artifact(a.id)
assert b.id in trace.dependency_chain
assert c.id in trace.dependency_chain
def test_generator_run_tracing(self, trace_service):
"""Test tracing nested generator runs."""
parent = PromptRun.create(
template_id="tmpl-1", input_bundle_hash="hash-1"
)
parent.mark_complete()
child = PromptRun.create(
template_id="tmpl-2",
input_bundle_hash="hash-2",
parent_run_id=parent.id,
depth=1,
)
child.mark_complete()
trace_service.register_run(parent)
trace_service.register_run(child)
# Trace should find generator runs from manifest context
trace = trace_service.trace_artifact("dummy-artifact-id")
# Generator runs are found via parent_run_id match
generators = trace_service.get_generator_runs(parent.id)
assert len(generators) == 1
assert generators[0].run_id == child.id
assert generators[0].depth == 1
class TestVisualizationIntegration:
"""Test visualization with real dependency data."""
def test_graph_export_from_real_data(self, artifact_repo, dep_repo):
"""Test DOT and Mermaid export from real DB data."""
from markitect.prompts.dependencies.graph import GraphBuilder
a = _create_artifact(artifact_repo, "s", "a")
b = _create_artifact(artifact_repo, "s", "b")
c = _create_artifact(artifact_repo, "s", "c")
_create_edge(dep_repo, a.id, b.id, "r1", EdgeType.REQUIRES)
_create_edge(dep_repo, b.id, c.id, "r1", EdgeType.GENERATES)
builder = GraphBuilder(dep_repo)
graph = builder.build_graph()
dot = GraphExporter.to_dot(graph, "Test Graph")
assert "digraph" in dot
assert "requires" in dot
assert "generates" in dot
mermaid = GraphExporter.to_mermaid(graph, "Test Graph")
assert "graph LR" in mermaid
assert "requires" in mermaid
assert "generates" in mermaid
class TestQueryServiceIntegration:
"""Test PromptQueryService with real data."""
def test_dependency_stats_with_data(
self, artifact_repo, dep_repo, query_service
):
"""Test stats with actual artifacts and edges."""
a = _create_artifact(artifact_repo, "s", "root")
b = _create_artifact(artifact_repo, "s", "mid")
c = _create_artifact(artifact_repo, "s", "leaf")
_create_edge(dep_repo, a.id, b.id, "r1")
_create_edge(dep_repo, b.id, c.id, "r1")
stats = query_service.get_dependency_stats()
assert stats["total_nodes"] == 3
assert stats["total_edges"] == 2
assert stats["root_count"] == 1
assert stats["leaf_count"] == 1
assert stats["has_cycles"] is False
def test_find_artifacts_by_digest_integration(
self, artifact_repo, query_service
):
"""Test finding artifacts by digest across spaces."""
a1 = _create_artifact(artifact_repo, "space-1", "doc-a", "shared content")
a2 = _create_artifact(artifact_repo, "space-2", "doc-b", "shared content")
results = query_service.find_artifacts_by_digest(a1.content_digest)
assert len(results) == 2
ids = {r["artifact_id"] for r in results}
assert a1.id in ids
assert a2.id in ids
def test_run_history_with_filters(self, query_service):
"""Test run history with template and status filters."""
from markitect.prompts.execution.models import RunStatus
run_ok = PromptRun.create(template_id="t1", input_bundle_hash="h1")
run_ok.mark_complete()
run_fail = PromptRun.create(template_id="t2", input_bundle_hash="h2")
run_fail.mark_failed("error")
query_service.register_run(run_ok)
query_service.register_run(run_fail)
# All runs
all_runs = query_service.get_run_history()
assert len(all_runs) == 2
# Filter by template
t1_runs = query_service.get_run_history(template_id="t1")
assert len(t1_runs) == 1
assert t1_runs[0]["template_id"] == "t1"
# Filter by status
failed = query_service.get_run_history(status="failed")
assert len(failed) == 1
assert failed[0]["status"] == "failed"