Complete implementation of Phase 8, the final phase of prompt dependency resolution infrastructure, adding full observability and traceability. ## Features (FR-11) ### FR-11.1: Complete Artifact Provenance Tracing - TraceabilityService: composition layer for full artifact lineage - Trace any artifact to producing PromptTemplate, input artifacts, generator runs, and quality validation results - ProvenanceTrace model with complete dependency chain reconstruction - RunSummary and ArtifactLineage models for structured trace output ### FR-11.2: Recomputation Query Infrastructure - PromptQueryService: cross-service complex queries - Run history queries with template and status filters - Stale artifact detection via impact debt analysis - Dependency graph statistics (nodes, edges, cycles, roots, leaves) - Content-based artifact lookups by digest ### Visualization Support - GraphExporter: DOT (Graphviz) and Mermaid format export - Supports all edge types (requires, generates, includes) - Handles isolated nodes, linear chains, diamonds, and complex graphs ### CLI Commands (prompt group) - `prompt trace <artifact_id>` - Full provenance trace as JSON - `prompt graph <artifact_id>` - Dependency graph (DOT/Mermaid) - `prompt runs` - List execution runs with filters - `prompt debt` - Show impact debt and stale artifacts - `prompt stats` - Dependency graph statistics ## Implementation Source files (8): - markitect/prompts/traceability/models.py - Trace data models - markitect/prompts/traceability/service.py - TraceabilityService - markitect/prompts/visualization/graph.py - Graph export - markitect/prompts/queries/operations.py - PromptQueryService - markitect/prompts/cli.py - Click CLI commands - Package __init__.py files (3) Tests (64 total, all passing): - tests/unit/prompts/test_traceability_service.py (21 tests) - tests/unit/prompts/test_visualization.py (14 tests) - tests/unit/prompts/test_query_operations.py (12 tests) - tests/integration/prompts/test_traceability_workflow.py (7 tests) - tests/integration/prompts/test_prompt_cli.py (10 tests) ## Architecture TraceabilityService is a composition layer that delegates to: - DependencyQueryService (transitive dependency lookups) - QualityValidator (validation history) - IncrementalExecutionEngine (impact debt queries) - Direct repository access (artifacts, edges) No duplicate data storage - all data comes from existing Phase 1-7 infrastructure (artifact repo, dependency repo, validation DB, debt DB). ## Verification All 2250 tests pass with 0 regressions. Phase 8 completes the full 8-phase implementation roadmap. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
316 lines
12 KiB
Python
316 lines
12 KiB
Python
"""
|
|
Traceability service for artifact provenance tracking.
|
|
|
|
Implements FR-11.1: Trace any artifact to its producing PromptTemplate,
|
|
input artifacts, generator runs, and quality validation results.
|
|
|
|
Implements FR-11.2: Enable recomputation based on dependency changes.
|
|
|
|
Composition layer over existing services — does NOT duplicate data storage.
|
|
"""
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from markitect.prompts.dependencies.graph import GraphBuilder
|
|
from markitect.prompts.dependencies.queries import DependencyQueryService
|
|
from markitect.prompts.dependencies.repository import IDependencyRepository
|
|
from markitect.prompts.execution.models import PromptRun
|
|
from markitect.prompts.incremental.engine import IncrementalExecutionEngine
|
|
from markitect.prompts.quality.validator import QualityValidator
|
|
from markitect.prompts.repositories.interfaces import IArtifactRepository
|
|
from markitect.prompts.traceability.models import (
|
|
ArtifactLineage,
|
|
ProvenanceTrace,
|
|
RunSummary,
|
|
)
|
|
|
|
|
|
def _run_to_summary(run: PromptRun) -> RunSummary:
|
|
"""Convert a PromptRun to a RunSummary."""
|
|
return RunSummary.create(
|
|
run_id=run.id,
|
|
template_id=run.template_id,
|
|
status=run.status.value,
|
|
stage=run.stage.value,
|
|
input_bundle_hash=run.input_bundle_hash,
|
|
started_at=run.started_at,
|
|
parent_run_id=run.parent_run_id,
|
|
depth=run.depth,
|
|
completed_at=run.completed_at,
|
|
)
|
|
|
|
|
|
class TraceabilityService:
|
|
"""
|
|
Composition layer for full artifact provenance tracing.
|
|
|
|
Delegates to DependencyQueryService, QualityValidator,
|
|
IncrementalExecutionEngine, and direct repository access.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
artifact_repo: IArtifactRepository,
|
|
dependency_repo: IDependencyRepository,
|
|
db_path: Optional[str] = None,
|
|
):
|
|
"""
|
|
Compose over existing repos and services.
|
|
|
|
Args:
|
|
artifact_repo: Repository for artifact lookups
|
|
dependency_repo: Repository for dependency edge lookups
|
|
db_path: Optional database path for quality/debt services
|
|
"""
|
|
self._artifact_repo = artifact_repo
|
|
self._dependency_repo = dependency_repo
|
|
self._db_path = db_path
|
|
self._query_service = DependencyQueryService(dependency_repo)
|
|
self._graph_builder = GraphBuilder(dependency_repo)
|
|
self._validator = QualityValidator(db_path=db_path) if db_path else None
|
|
self._engine = (
|
|
IncrementalExecutionEngine(db_path, self._query_service)
|
|
if db_path
|
|
else None
|
|
)
|
|
# Run registry: external code can register runs for tracing
|
|
self._runs: Dict[str, PromptRun] = {}
|
|
|
|
def register_run(self, run: PromptRun) -> None:
|
|
"""
|
|
Register a run for traceability lookups.
|
|
|
|
Args:
|
|
run: PromptRun to register
|
|
"""
|
|
self._runs[run.id] = run
|
|
|
|
def trace_artifact(self, artifact_id: str) -> ProvenanceTrace:
|
|
"""
|
|
Full provenance trace for an artifact (FR-11.1).
|
|
|
|
Args:
|
|
artifact_id: Artifact to trace
|
|
|
|
Returns:
|
|
ProvenanceTrace with all provenance data
|
|
"""
|
|
trace = ProvenanceTrace(artifact_id=artifact_id)
|
|
|
|
# Find producing run
|
|
producing_run = self.get_producing_run(artifact_id)
|
|
trace.producing_run = producing_run
|
|
|
|
if producing_run:
|
|
# Get template artifact
|
|
template_artifact = self._artifact_repo.get_by_id(
|
|
producing_run.template_id
|
|
)
|
|
if template_artifact:
|
|
trace.template = ArtifactLineage(
|
|
artifact_id=template_artifact.id,
|
|
name=template_artifact.name,
|
|
space_id=template_artifact.space_id,
|
|
artifact_type=template_artifact.artifact_type.value,
|
|
content_digest=template_artifact.content_digest,
|
|
role="template",
|
|
)
|
|
|
|
# Get input and output artifacts
|
|
trace.input_artifacts = self.get_input_artifacts(producing_run.run_id)
|
|
trace.output_artifacts = self.get_output_artifacts(producing_run.run_id)
|
|
|
|
# Get generator sub-runs
|
|
trace.generator_runs = self.get_generator_runs(producing_run.run_id)
|
|
|
|
# Get validation history
|
|
trace.validation_results = self.get_validation_history(artifact_id)
|
|
|
|
# Get impact debt
|
|
trace.impact_debt = self.get_impact_debt(artifact_id)
|
|
|
|
# Build dependency chain
|
|
deps = self._query_service.find_transitive_dependencies(artifact_id)
|
|
trace.dependency_chain = sorted(deps)
|
|
|
|
return trace
|
|
|
|
def get_producing_run(self, artifact_id: str) -> Optional[RunSummary]:
|
|
"""
|
|
Find the run that produced an artifact.
|
|
|
|
Searches registered runs for one whose manifest lists
|
|
this artifact as an output.
|
|
|
|
Args:
|
|
artifact_id: Artifact to find producer of
|
|
|
|
Returns:
|
|
RunSummary if found, None otherwise
|
|
"""
|
|
# Check dependency edges: find edges where this artifact is a target
|
|
# with edge_type "generates"
|
|
edges = self._dependency_repo.get_by_target(artifact_id)
|
|
for edge in edges:
|
|
# The source is the run or template that generated this artifact
|
|
run = self._runs.get(edge.source_artifact_id)
|
|
if run:
|
|
return _run_to_summary(run)
|
|
|
|
# Fallback: search registered runs by manifest metadata
|
|
for run in self._runs.values():
|
|
manifest = run.metadata.get("manifest", {})
|
|
outputs = manifest.get("output_artifacts", [])
|
|
for output in outputs:
|
|
if output.get("artifact_id") == artifact_id:
|
|
return _run_to_summary(run)
|
|
|
|
return None
|
|
|
|
def get_input_artifacts(self, run_id: str) -> List[ArtifactLineage]:
|
|
"""
|
|
Get all input artifacts for a run.
|
|
|
|
Uses dependency edges to find artifacts that the run depends on.
|
|
|
|
Args:
|
|
run_id: Run identifier
|
|
|
|
Returns:
|
|
List of ArtifactLineage for inputs
|
|
"""
|
|
result = []
|
|
# Find edges where this run is the target (artifacts -> run)
|
|
edges = self._dependency_repo.get_by_run(run_id)
|
|
for edge in edges:
|
|
if edge.edge_type.value == "requires":
|
|
artifact = self._artifact_repo.get_by_id(edge.source_artifact_id)
|
|
if artifact:
|
|
result.append(
|
|
ArtifactLineage(
|
|
artifact_id=artifact.id,
|
|
name=artifact.name,
|
|
space_id=artifact.space_id,
|
|
artifact_type=artifact.artifact_type.value,
|
|
content_digest=artifact.content_digest,
|
|
role="input",
|
|
)
|
|
)
|
|
# Also check manifest resolved_inputs
|
|
run = self._runs.get(run_id)
|
|
if run:
|
|
manifest = run.metadata.get("manifest", {})
|
|
seen_ids = {a.artifact_id for a in result}
|
|
for inp in manifest.get("resolved_inputs", []):
|
|
aid = inp.get("artifact_id", "")
|
|
if aid and aid not in seen_ids:
|
|
artifact = self._artifact_repo.get_by_id(aid)
|
|
if artifact:
|
|
result.append(
|
|
ArtifactLineage(
|
|
artifact_id=artifact.id,
|
|
name=artifact.name,
|
|
space_id=artifact.space_id,
|
|
artifact_type=artifact.artifact_type.value,
|
|
content_digest=artifact.content_digest,
|
|
role="input",
|
|
)
|
|
)
|
|
seen_ids.add(aid)
|
|
return result
|
|
|
|
def get_output_artifacts(self, run_id: str) -> List[ArtifactLineage]:
|
|
"""
|
|
Get all output artifacts produced by a run.
|
|
|
|
Args:
|
|
run_id: Run identifier
|
|
|
|
Returns:
|
|
List of ArtifactLineage for outputs
|
|
"""
|
|
result = []
|
|
# Find edges where this run is the source with "generates"
|
|
edges = self._dependency_repo.get_by_run(run_id)
|
|
for edge in edges:
|
|
if edge.edge_type.value == "generates":
|
|
artifact = self._artifact_repo.get_by_id(edge.target_artifact_id)
|
|
if artifact:
|
|
result.append(
|
|
ArtifactLineage(
|
|
artifact_id=artifact.id,
|
|
name=artifact.name,
|
|
space_id=artifact.space_id,
|
|
artifact_type=artifact.artifact_type.value,
|
|
content_digest=artifact.content_digest,
|
|
role="output",
|
|
)
|
|
)
|
|
# Also check manifest output_artifacts
|
|
run = self._runs.get(run_id)
|
|
if run:
|
|
manifest = run.metadata.get("manifest", {})
|
|
seen_ids = {a.artifact_id for a in result}
|
|
for out in manifest.get("output_artifacts", []):
|
|
aid = out.get("artifact_id", "")
|
|
if aid and aid not in seen_ids:
|
|
artifact = self._artifact_repo.get_by_id(aid)
|
|
if artifact:
|
|
result.append(
|
|
ArtifactLineage(
|
|
artifact_id=artifact.id,
|
|
name=artifact.name,
|
|
space_id=artifact.space_id,
|
|
artifact_type=artifact.artifact_type.value,
|
|
content_digest=artifact.content_digest,
|
|
role="output",
|
|
)
|
|
)
|
|
seen_ids.add(aid)
|
|
return result
|
|
|
|
def get_generator_runs(self, run_id: str) -> List[RunSummary]:
|
|
"""
|
|
Get nested generator runs spawned by a run.
|
|
|
|
Args:
|
|
run_id: Parent run identifier
|
|
|
|
Returns:
|
|
List of RunSummary for child runs
|
|
"""
|
|
return [
|
|
_run_to_summary(run)
|
|
for run in self._runs.values()
|
|
if run.parent_run_id == run_id
|
|
]
|
|
|
|
def get_validation_history(self, artifact_id: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get validation results for an artifact across all runs.
|
|
|
|
Args:
|
|
artifact_id: Artifact identifier
|
|
|
|
Returns:
|
|
List of validation result dictionaries
|
|
"""
|
|
if self._validator:
|
|
return self._validator.get_results_for_artifact(artifact_id)
|
|
return []
|
|
|
|
def get_impact_debt(self, artifact_id: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get suppressed recomputation records for an artifact.
|
|
|
|
Args:
|
|
artifact_id: Artifact identifier
|
|
|
|
Returns:
|
|
List of impact debt dictionaries
|
|
"""
|
|
if self._engine:
|
|
debts = self._engine.get_debt_for_artifact(artifact_id)
|
|
return [d.to_dict() for d in debts]
|
|
return []
|