feat(prompts): implement Phase 8 - Observability & Traceability (FR-11)

Complete implementation of Phase 8, the final phase of prompt dependency
resolution infrastructure, adding full observability and traceability.

## Features (FR-11)

### FR-11.1: Complete Artifact Provenance Tracing
- TraceabilityService: composition layer for full artifact lineage
- Trace any artifact to producing PromptTemplate, input artifacts,
  generator runs, and quality validation results
- ProvenanceTrace model with complete dependency chain reconstruction
- RunSummary and ArtifactLineage models for structured trace output

### FR-11.2: Recomputation Query Infrastructure
- PromptQueryService: cross-service complex queries
- Run history queries with template and status filters
- Stale artifact detection via impact debt analysis
- Dependency graph statistics (nodes, edges, cycles, roots, leaves)
- Content-based artifact lookups by digest

### Visualization Support
- GraphExporter: DOT (Graphviz) and Mermaid format export
- Supports all edge types (requires, generates, includes)
- Handles isolated nodes, linear chains, diamonds, and complex graphs

### CLI Commands (prompt group)
- `prompt trace <artifact_id>` - Full provenance trace as JSON
- `prompt graph <artifact_id>` - Dependency graph (DOT/Mermaid)
- `prompt runs` - List execution runs with filters
- `prompt debt` - Show impact debt and stale artifacts
- `prompt stats` - Dependency graph statistics

## Implementation

Source files (8):
- markitect/prompts/traceability/models.py - Trace data models
- markitect/prompts/traceability/service.py - TraceabilityService
- markitect/prompts/visualization/graph.py - Graph export
- markitect/prompts/queries/operations.py - PromptQueryService
- markitect/prompts/cli.py - Click CLI commands
- Package __init__.py files (3)

Tests (64 total, all passing):
- tests/unit/prompts/test_traceability_service.py (21 tests)
- tests/unit/prompts/test_visualization.py (14 tests)
- tests/unit/prompts/test_query_operations.py (12 tests)
- tests/integration/prompts/test_traceability_workflow.py (7 tests)
- tests/integration/prompts/test_prompt_cli.py (10 tests)

## Architecture

TraceabilityService is a composition layer that delegates to:
- DependencyQueryService (transitive dependency lookups)
- QualityValidator (validation history)
- IncrementalExecutionEngine (impact debt queries)
- Direct repository access (artifacts, edges)

No duplicate data storage - all data comes from existing Phase 1-7
infrastructure (artifact repo, dependency repo, validation DB, debt DB).

## Verification

All 2250 tests pass with 0 regressions.
Phase 8 completes the full 8-phase implementation roadmap.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 20:32:18 +01:00
parent 704272644c
commit 7b4bd461c9
14 changed files with 2012 additions and 0 deletions

View File

@@ -0,0 +1,184 @@
"""
Unit tests for PromptQueryService.
Tests run history, impact analysis queries, dependency stats,
and artifact digest lookups.
"""
import pytest
import tempfile
from pathlib import Path
from markitect.prompts.dependencies.models import DependencyEdge, EdgeType
from markitect.prompts.dependencies.repository import SQLiteDependencyRepository
from markitect.prompts.execution.models import PromptRun, RunStatus
from markitect.prompts.models import Artifact, ArtifactType
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
from markitect.prompts.queries.operations import PromptQueryService
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def artifact_repo(temp_db):
return SQLiteArtifactRepository(temp_db)
@pytest.fixture
def dep_repo(temp_db):
return SQLiteDependencyRepository(temp_db)
@pytest.fixture
def query_service(artifact_repo, dep_repo, temp_db):
return PromptQueryService(artifact_repo, dep_repo, db_path=temp_db)
def _make_artifact(repo, space_id, name, content="content"):
artifact = Artifact.create(
space_id=space_id, name=name, content=content,
artifact_type=ArtifactType.CONTENT,
)
return repo.create(artifact)
def _make_run(template_id, status=RunStatus.SUCCESS):
run = PromptRun.create(
template_id=template_id,
input_bundle_hash="hash-abc",
)
if status == RunStatus.SUCCESS:
run.mark_complete()
elif status == RunStatus.FAILED:
run.mark_failed("error")
return run
def _make_edge(repo, source, target, run_id):
edge = DependencyEdge.create(
source_artifact_id=source,
target_artifact_id=target,
run_id=run_id,
edge_type=EdgeType.REQUIRES,
)
return repo.create(edge)
class TestRunHistory:
"""Tests for get_run_history."""
def test_returns_all_runs(self, query_service):
"""Test returning all registered runs."""
run1 = _make_run("tmpl-1")
run2 = _make_run("tmpl-2")
query_service.register_run(run1)
query_service.register_run(run2)
history = query_service.get_run_history()
assert len(history) == 2
def test_filter_by_template(self, query_service):
"""Test filtering by template_id."""
run1 = _make_run("tmpl-1")
run2 = _make_run("tmpl-2")
query_service.register_run(run1)
query_service.register_run(run2)
history = query_service.get_run_history(template_id="tmpl-1")
assert len(history) == 1
assert history[0]["template_id"] == "tmpl-1"
def test_filter_by_status(self, query_service):
"""Test filtering by status."""
run_ok = _make_run("tmpl-1", RunStatus.SUCCESS)
run_fail = _make_run("tmpl-1", RunStatus.FAILED)
query_service.register_run(run_ok)
query_service.register_run(run_fail)
history = query_service.get_run_history(status="success")
assert len(history) == 1
assert history[0]["status"] == "success"
def test_limit(self, query_service):
"""Test limit parameter."""
for i in range(10):
run = _make_run(f"tmpl-{i}")
query_service.register_run(run)
history = query_service.get_run_history(limit=3)
assert len(history) == 3
def test_empty_history(self, query_service):
"""Test empty run history."""
assert query_service.get_run_history() == []
class TestStaleArtifacts:
"""Tests for get_stale_artifacts."""
def test_no_debt_returns_empty(self, query_service):
"""Test returns empty when no debt exists."""
assert query_service.get_stale_artifacts() == []
def test_no_engine_returns_empty(self, artifact_repo, dep_repo):
"""Test returns empty without db_path."""
svc = PromptQueryService(artifact_repo, dep_repo, db_path=None)
assert svc.get_stale_artifacts() == []
class TestDependencyStats:
"""Tests for get_dependency_stats."""
def test_empty_graph(self, query_service):
"""Test stats for empty graph."""
stats = query_service.get_dependency_stats()
assert stats["total_nodes"] == 0
assert stats["total_edges"] == 0
assert stats["has_cycles"] is False
def test_simple_graph(self, query_service, artifact_repo, dep_repo):
"""Test stats for a simple graph."""
a = _make_artifact(artifact_repo, "s", "a")
b = _make_artifact(artifact_repo, "s", "b")
c = _make_artifact(artifact_repo, "s", "c")
_make_edge(dep_repo, a.id, b.id, "r1")
_make_edge(dep_repo, b.id, c.id, "r1")
stats = query_service.get_dependency_stats()
assert stats["total_nodes"] == 3
assert stats["total_edges"] == 2
assert stats["root_count"] == 1 # 'a' is root
assert stats["leaf_count"] == 1 # 'c' is leaf
assert stats["has_cycles"] is False
class TestFindArtifactsByDigest:
"""Tests for find_artifacts_by_digest."""
def test_finds_matching(self, query_service, artifact_repo):
"""Test finding artifacts by content digest."""
art = _make_artifact(artifact_repo, "s", "test-art", "hello")
results = query_service.find_artifacts_by_digest(art.content_digest)
assert len(results) == 1
assert results[0]["artifact_id"] == art.id
def test_no_match(self, query_service):
"""Test returns empty for non-matching digest."""
assert query_service.find_artifacts_by_digest("nonexistent") == []
def test_multiple_matches(self, query_service, artifact_repo):
"""Test finding multiple artifacts with same digest."""
_make_artifact(artifact_repo, "s1", "a", "same-content")
_make_artifact(artifact_repo, "s2", "a", "same-content")
art = _make_artifact(artifact_repo, "s3", "a", "same-content")
results = query_service.find_artifacts_by_digest(art.content_digest)
assert len(results) == 3

View File

@@ -0,0 +1,356 @@
"""
Unit tests for TraceabilityService.
Tests trace_artifact, get_producing_run, get_input_artifacts,
get_generator_runs, and get_validation_history.
"""
import pytest
import tempfile
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock
from markitect.prompts.dependencies.models import DependencyEdge, EdgeType
from markitect.prompts.dependencies.repository import SQLiteDependencyRepository
from markitect.prompts.execution.models import (
ExecutionStage,
PromptRun,
RunConfig,
RunStatus,
)
from markitect.prompts.models import Artifact, ArtifactType
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
from markitect.prompts.traceability.models import (
ArtifactLineage,
ProvenanceTrace,
RunSummary,
)
from markitect.prompts.traceability.service import TraceabilityService
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def artifact_repo(temp_db):
"""Create artifact repository."""
return SQLiteArtifactRepository(temp_db)
@pytest.fixture
def dep_repo(temp_db):
"""Create dependency repository."""
return SQLiteDependencyRepository(temp_db)
@pytest.fixture
def service(artifact_repo, dep_repo, temp_db):
"""Create TraceabilityService."""
return TraceabilityService(artifact_repo, dep_repo, db_path=temp_db)
def _make_artifact(repo, space_id, name, content="content", atype=ArtifactType.CONTENT):
"""Helper to create and persist an artifact."""
artifact = Artifact.create(
space_id=space_id, name=name, content=content, artifact_type=atype
)
return repo.create(artifact)
def _make_run(template_id, status=RunStatus.SUCCESS, parent_run_id=None, depth=0):
"""Helper to create a PromptRun."""
run = PromptRun.create(
template_id=template_id,
input_bundle_hash="hash-abc",
parent_run_id=parent_run_id,
depth=depth,
)
if status == RunStatus.SUCCESS:
run.mark_complete()
return run
def _make_edge(repo, source, target, run_id, edge_type=EdgeType.REQUIRES):
"""Helper to create a dependency edge."""
edge = DependencyEdge.create(
source_artifact_id=source,
target_artifact_id=target,
run_id=run_id,
edge_type=edge_type,
)
return repo.create(edge)
class TestTraceArtifact:
"""Tests for trace_artifact."""
def test_trace_returns_provenance(self, service, artifact_repo, dep_repo):
"""Test trace_artifact returns a ProvenanceTrace."""
art = _make_artifact(artifact_repo, "space-1", "output-doc")
trace = service.trace_artifact(art.id)
assert isinstance(trace, ProvenanceTrace)
assert trace.artifact_id == art.id
def test_trace_with_producing_run(self, service, artifact_repo, dep_repo):
"""Test trace finds producing run via manifest metadata."""
template = _make_artifact(
artifact_repo, "space-1", "tmpl", atype=ArtifactType.TEMPLATE
)
output = _make_artifact(artifact_repo, "space-1", "output")
run = _make_run(template.id)
run.metadata["manifest"] = {
"output_artifacts": [{"artifact_id": output.id}],
"resolved_inputs": [],
}
service.register_run(run)
trace = service.trace_artifact(output.id)
assert trace.producing_run is not None
assert trace.producing_run.run_id == run.id
def test_trace_no_producing_run(self, service, artifact_repo):
"""Test trace with no producing run returns None."""
art = _make_artifact(artifact_repo, "space-1", "standalone")
trace = service.trace_artifact(art.id)
assert trace.producing_run is None
def test_trace_includes_dependency_chain(self, service, artifact_repo, dep_repo):
"""Test trace includes transitive dependency chain."""
a = _make_artifact(artifact_repo, "s", "a")
b = _make_artifact(artifact_repo, "s", "b")
c = _make_artifact(artifact_repo, "s", "c")
_make_edge(dep_repo, a.id, b.id, "r1")
_make_edge(dep_repo, b.id, c.id, "r1")
trace = service.trace_artifact(a.id)
assert b.id in trace.dependency_chain
assert c.id in trace.dependency_chain
def test_trace_to_dict(self, service, artifact_repo):
"""Test ProvenanceTrace serialization."""
art = _make_artifact(artifact_repo, "s", "x")
trace = service.trace_artifact(art.id)
d = trace.to_dict()
assert d["artifact_id"] == art.id
assert "producing_run" in d
assert "input_artifacts" in d
assert "dependency_chain" in d
class TestGetProducingRun:
"""Tests for get_producing_run."""
def test_finds_run_via_generates_edge(self, service, artifact_repo, dep_repo):
"""Test finding run via generates dependency edge."""
output = _make_artifact(artifact_repo, "s", "output")
run = _make_run("tmpl-1")
service.register_run(run)
# Create a "generates" edge from run.id -> output.id
_make_edge(dep_repo, run.id, output.id, run.id, EdgeType.GENERATES)
result = service.get_producing_run(output.id)
assert result is not None
assert result.run_id == run.id
def test_finds_run_via_manifest(self, service, artifact_repo):
"""Test finding run via manifest output_artifacts."""
output = _make_artifact(artifact_repo, "s", "output")
run = _make_run("tmpl-1")
run.metadata["manifest"] = {
"output_artifacts": [{"artifact_id": output.id}],
}
service.register_run(run)
result = service.get_producing_run(output.id)
assert result is not None
assert result.run_id == run.id
def test_returns_none_when_not_found(self, service):
"""Test returns None when no producing run exists."""
result = service.get_producing_run("nonexistent")
assert result is None
class TestGetInputArtifacts:
"""Tests for get_input_artifacts."""
def test_finds_inputs_via_edges(self, service, artifact_repo, dep_repo):
"""Test finding input artifacts via dependency edges."""
inp = _make_artifact(artifact_repo, "s", "input-data")
run = _make_run("tmpl-1")
service.register_run(run)
_make_edge(dep_repo, inp.id, run.id, run.id, EdgeType.REQUIRES)
inputs = service.get_input_artifacts(run.id)
assert len(inputs) == 1
assert inputs[0].artifact_id == inp.id
assert inputs[0].role == "input"
def test_finds_inputs_via_manifest(self, service, artifact_repo, dep_repo):
"""Test finding input artifacts via manifest resolved_inputs."""
inp = _make_artifact(artifact_repo, "s", "input-data")
run = _make_run("tmpl-1")
run.metadata["manifest"] = {
"resolved_inputs": [{"artifact_id": inp.id}],
"output_artifacts": [],
}
service.register_run(run)
inputs = service.get_input_artifacts(run.id)
assert len(inputs) == 1
assert inputs[0].artifact_id == inp.id
def test_no_duplicates(self, service, artifact_repo, dep_repo):
"""Test inputs are deduplicated across edges and manifest."""
inp = _make_artifact(artifact_repo, "s", "input-data")
run = _make_run("tmpl-1")
run.metadata["manifest"] = {
"resolved_inputs": [{"artifact_id": inp.id}],
"output_artifacts": [],
}
service.register_run(run)
_make_edge(dep_repo, inp.id, run.id, run.id, EdgeType.REQUIRES)
inputs = service.get_input_artifacts(run.id)
assert len(inputs) == 1
class TestGetOutputArtifacts:
"""Tests for get_output_artifacts."""
def test_finds_outputs_via_edges(self, service, artifact_repo, dep_repo):
"""Test finding output artifacts via generates edges."""
output = _make_artifact(artifact_repo, "s", "output")
run = _make_run("tmpl-1")
service.register_run(run)
_make_edge(dep_repo, run.id, output.id, run.id, EdgeType.GENERATES)
outputs = service.get_output_artifacts(run.id)
assert len(outputs) == 1
assert outputs[0].artifact_id == output.id
assert outputs[0].role == "output"
class TestGetGeneratorRuns:
"""Tests for get_generator_runs."""
def test_finds_child_runs(self, service):
"""Test finding nested generator runs."""
parent = _make_run("tmpl-1")
child1 = _make_run("tmpl-2", parent_run_id=parent.id, depth=1)
child2 = _make_run("tmpl-3", parent_run_id=parent.id, depth=1)
unrelated = _make_run("tmpl-4")
service.register_run(parent)
service.register_run(child1)
service.register_run(child2)
service.register_run(unrelated)
children = service.get_generator_runs(parent.id)
child_ids = {c.run_id for c in children}
assert child1.id in child_ids
assert child2.id in child_ids
assert unrelated.id not in child_ids
def test_no_children(self, service):
"""Test returns empty when no child runs exist."""
parent = _make_run("tmpl-1")
service.register_run(parent)
assert service.get_generator_runs(parent.id) == []
class TestGetValidationHistory:
"""Tests for get_validation_history."""
def test_returns_empty_without_db(self, artifact_repo, dep_repo):
"""Test returns empty list when no db_path."""
svc = TraceabilityService(artifact_repo, dep_repo, db_path=None)
assert svc.get_validation_history("art-1") == []
def test_returns_results_with_db(self, service):
"""Test returns results from validator when db_path is set."""
# Without actually writing validation data, should return empty
results = service.get_validation_history("art-1")
assert results == []
class TestGetImpactDebt:
"""Tests for get_impact_debt."""
def test_returns_empty_without_db(self, artifact_repo, dep_repo):
"""Test returns empty list when no db_path."""
svc = TraceabilityService(artifact_repo, dep_repo, db_path=None)
assert svc.get_impact_debt("art-1") == []
def test_returns_empty_no_debt(self, service):
"""Test returns empty when no debt exists."""
assert service.get_impact_debt("nonexistent") == []
class TestRunSummary:
"""Tests for RunSummary model."""
def test_create_and_to_dict(self):
"""Test RunSummary creation and serialization."""
now = datetime.utcnow()
summary = RunSummary.create(
run_id="r1",
template_id="t1",
status="success",
stage="complete",
input_bundle_hash="hash",
started_at=now,
completed_at=now,
)
d = summary.to_dict()
assert d["run_id"] == "r1"
assert d["status"] == "success"
assert d["completed_at"] is not None
def test_optional_fields(self):
"""Test RunSummary with optional fields as None."""
now = datetime.utcnow()
summary = RunSummary.create(
run_id="r1",
template_id="t1",
status="pending",
stage="pending",
input_bundle_hash="hash",
started_at=now,
)
d = summary.to_dict()
assert d["parent_run_id"] is None
assert d["completed_at"] is None
assert d["depth"] == 0
class TestArtifactLineage:
"""Tests for ArtifactLineage model."""
def test_to_dict(self):
"""Test ArtifactLineage serialization."""
lineage = ArtifactLineage(
artifact_id="a1",
name="test",
space_id="s1",
artifact_type="content",
content_digest="abc123",
role="input",
)
d = lineage.to_dict()
assert d["artifact_id"] == "a1"
assert d["role"] == "input"

View File

@@ -0,0 +1,141 @@
"""
Unit tests for GraphExporter.
Tests DOT and Mermaid export with various graph shapes.
"""
import pytest
from markitect.prompts.dependencies.models import DependencyGraph, EdgeType
from markitect.prompts.visualization.graph import GraphExporter
@pytest.fixture
def empty_graph():
"""Empty graph with no nodes."""
return DependencyGraph()
@pytest.fixture
def single_node_graph():
"""Graph with a single isolated node."""
g = DependencyGraph()
g._forward["node-a"] = set()
g._reverse["node-a"] = set()
return g
@pytest.fixture
def linear_graph():
"""Linear chain: A -> B -> C."""
g = DependencyGraph()
g.add_edge("A", "B", EdgeType.REQUIRES)
g.add_edge("B", "C", EdgeType.GENERATES)
return g
@pytest.fixture
def diamond_graph():
"""Diamond: A -> B, A -> C, B -> D, C -> D."""
g = DependencyGraph()
g.add_edge("A", "B", EdgeType.REQUIRES)
g.add_edge("A", "C", EdgeType.REQUIRES)
g.add_edge("B", "D", EdgeType.GENERATES)
g.add_edge("C", "D", EdgeType.INCLUDES)
return g
class TestToDot:
"""Tests for to_dot export."""
def test_empty_graph(self, empty_graph):
"""Test DOT output for empty graph."""
dot = GraphExporter.to_dot(empty_graph)
assert 'digraph "Dependencies"' in dot
assert "rankdir=LR" in dot
def test_single_node(self, single_node_graph):
"""Test DOT output with single node."""
dot = GraphExporter.to_dot(single_node_graph)
assert "node_a" in dot
assert 'label="node-a"' in dot
def test_linear_graph(self, linear_graph):
"""Test DOT output for linear chain."""
dot = GraphExporter.to_dot(linear_graph)
assert "A -> B" in dot
assert "B -> C" in dot
assert 'label="requires"' in dot
assert 'label="generates"' in dot
def test_diamond_graph(self, diamond_graph):
"""Test DOT output for diamond graph."""
dot = GraphExporter.to_dot(diamond_graph)
assert "A -> B" in dot
assert "A -> C" in dot
assert "B -> D" in dot
assert "C -> D" in dot
def test_custom_title(self, linear_graph):
"""Test DOT output with custom title."""
dot = GraphExporter.to_dot(linear_graph, title="My Graph")
assert 'digraph "My Graph"' in dot
assert 'label="My Graph"' in dot
def test_edge_styles(self, linear_graph):
"""Test DOT edge styles for different edge types."""
dot = GraphExporter.to_dot(linear_graph)
assert 'style="solid"' in dot # REQUIRES
assert 'style="dashed"' in dot # GENERATES
def test_dot_is_valid_structure(self, diamond_graph):
"""Test DOT output has valid opening/closing braces."""
dot = GraphExporter.to_dot(diamond_graph)
assert dot.startswith('digraph')
assert dot.endswith("}")
class TestToMermaid:
"""Tests for to_mermaid export."""
def test_empty_graph(self, empty_graph):
"""Test Mermaid output for empty graph."""
mermaid = GraphExporter.to_mermaid(empty_graph)
assert "graph LR" in mermaid
def test_single_node(self, single_node_graph):
"""Test Mermaid output with single node."""
mermaid = GraphExporter.to_mermaid(single_node_graph)
assert "node-a" in mermaid
def test_linear_graph(self, linear_graph):
"""Test Mermaid output for linear chain."""
mermaid = GraphExporter.to_mermaid(linear_graph)
assert "A-->|requires|B" in mermaid
assert "B-.->|generates|C" in mermaid
def test_diamond_graph(self, diamond_graph):
"""Test Mermaid output for diamond graph."""
mermaid = GraphExporter.to_mermaid(diamond_graph)
assert "A-->|requires|B" in mermaid
assert "A-->|requires|C" in mermaid
assert "B-.->|generates|D" in mermaid
assert "C==>|includes|D" in mermaid
def test_custom_title(self, linear_graph):
"""Test Mermaid output with custom title."""
mermaid = GraphExporter.to_mermaid(linear_graph, title="Build Graph")
assert "Build Graph" in mermaid
def test_edge_arrows(self, diamond_graph):
"""Test Mermaid edge arrows for different types."""
mermaid = GraphExporter.to_mermaid(diamond_graph)
assert "-->" in mermaid # REQUIRES
assert "-.->" in mermaid # GENERATES
assert "==>" in mermaid # INCLUDES
def test_mermaid_starts_with_graph(self, linear_graph):
"""Test Mermaid output starts with graph directive."""
mermaid = GraphExporter.to_mermaid(linear_graph)
lines = mermaid.strip().split("\n")
assert "graph LR" in lines[1]