feat(prompts): implement Phase 4 - Execution Engine (FR-4, FR-5)

Implement three-stage execution lifecycle with idempotent runs and complete provenance tracking via RunManifest. Core Features: - PromptRun model with execution lifecycle stages: 1. Analysis: Template analysis and macro extraction 2. Compilation: Macro resolution and context compilation 3. Processing: LLM execution and output generation - InputBundleHash for deterministic idempotency (FR-4.3) - RunManifest for complete execution provenance (FR-5) - LLMAdapter interface for pluggable model providers - MockLLMAdapter for testing without API calls - PromptExecutionEngine orchestrating full lifecycle Idempotent Execution (FR-4.4): - Calculate SHA-256 hash of complete input context - Skip execution if identical hash exists - Cache successful runs by hash - Support force re-execution via config flag RunManifest Tracking (FR-5.2): - Template metadata (id, name, digest) - Resolved input artifacts and digests - Compiled prompt digest - Model configuration - Output artifacts - Dependency edges for graph construction - Timing metadata for performance analysis Tests (27 passing): - 17 execution model tests (config, bundle, runs, stages) - 10 engine tests (execution, idempotency, errors, caching) Implements: - FR-4.1: Three-stage execution lifecycle - FR-4.2: CompiledPrompt during compilation - FR-4.3: InputBundleHash calculation - FR-4.4: Skip execution for identical hashes - FR-5.1: RunManifest persistence - FR-5.2: Complete manifest contents - FR-5.3: Nested run linking (foundation) Files Created: - markitect/prompts/execution/models.py - markitect/prompts/execution/manifest.py - markitect/prompts/execution/llm_adapter.py - markitect/prompts/execution/engine.py - migrations/prompts/003_create_runs_and_manifests.sql - tests/unit/prompts/test_execution_models.py - tests/unit/prompts/test_execution_engine.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 23:15:33 +01:00
parent 5f463e5b20
commit c56c92c815
8 changed files with 1739 additions and 0 deletions
--- a/markitect/prompts/execution/engine.py
+++ b/markitect/prompts/execution/engine.py
@@ -0,0 +1,268 @@
+"""
+Prompt execution engine.
+
+Implements FR-4: PromptRun Lifecycle
+Three-stage execution with idempotent runs.
+"""
+
+import time
+from typing import Optional, Dict
+
+from markitect.prompts.templates.models import PromptTemplate
+from markitect.prompts.templates.analyzer import TemplateAnalyzer
+from markitect.prompts.resolver.resolver import PromptResolver
+from markitect.prompts.resolver.compiler import ContextCompiler
+from markitect.prompts.resolver.strategy import ResolutionConfig
+from markitect.prompts.execution.models import (
+    PromptRun,
+    ExecutionStage,
+    RunConfig,
+    RunStatus,
+    InputBundle,
+)
+from markitect.prompts.execution.manifest import RunManifest
+from markitect.prompts.execution.llm_adapter import LLMAdapter
+from markitect.prompts.models import calculate_content_digest, ArtifactType
+from markitect.prompts.services.artifact_service import ArtifactService
+
+
+class PromptExecutionEngine:
+    """
+    Engine for executing prompt templates.
+
+    Implements FR-4: PromptRun Lifecycle
+    - Stage 1: Template Analysis
+    - Stage 2: Context Compilation
+    - Stage 3: Prompt Processing
+
+    Implements FR-4.4: Idempotent execution via InputBundleHash
+    """
+
+    def __init__(
+        self,
+        artifact_service: ArtifactService,
+        template_analyzer: TemplateAnalyzer,
+        resolver: PromptResolver,
+        compiler: ContextCompiler,
+        llm_adapter: LLMAdapter,
+    ):
+        """
+        Initialize execution engine.
+
+        Args:
+            artifact_service: For artifact operations
+            template_analyzer: For template analysis
+            resolver: For macro resolution
+            compiler: For context compilation
+            llm_adapter: For LLM execution
+        """
+        self.artifact_service = artifact_service
+        self.template_analyzer = template_analyzer
+        self.resolver = resolver
+        self.compiler = compiler
+        self.llm_adapter = llm_adapter
+        self.run_cache: Dict[str, PromptRun] = {}  # Cache by input_bundle_hash
+
+    def execute(
+        self,
+        template: PromptTemplate,
+        template_content: str,
+        resolution_config: ResolutionConfig,
+        run_config: Optional[RunConfig] = None,
+    ) -> PromptRun:
+        """
+        Execute a prompt template.
+
+        Implements FR-4.1: Three-stage execution
+        1. Analysis: Analyze template and extract macros
+        2. Compilation: Resolve macros and compile prompt
+        3. Processing: Execute with LLM
+
+        Implements FR-4.4: Skip if identical InputBundleHash exists
+
+        Args:
+            template: Template to execute
+            template_content: Template content
+            resolution_config: Resolution configuration
+            run_config: Execution configuration
+
+        Returns:
+            PromptRun with execution results
+        """
+        config = run_config or RunConfig()
+
+        # Stage 1: Template Analysis
+        start_time = time.time()
+        if not template.analyzed:
+            self.template_analyzer.analyze(template, template_content)
+        analysis_time = time.time() - start_time
+
+        # Stage 2: Context Compilation
+        start_time = time.time()
+        resolution_result = self.resolver.resolve_template(template, resolution_config)
+
+        if not resolution_result.success:
+            # Resolution failed - create failed run
+            run = self._create_failed_run(
+                template,
+                "Resolution failed: " + ", ".join(resolution_result.context.errors),
+                config,
+            )
+            return run
+
+        compiled = self.compiler.compile(template, template_content, resolution_result)
+        compilation_time = time.time() - start_time
+
+        # Calculate InputBundleHash (FR-4.3)
+        input_bundle = InputBundle(
+            template_digest=template.content_digest,
+            dependency_digests=compiled.dependency_digests,
+            resolution_config_hash=calculate_content_digest(
+                str(resolution_config.to_dict())
+            ),
+            model_config={
+                "model_name": config.model_name,
+                "temperature": config.temperature,
+                "max_tokens": config.max_tokens,
+            },
+        )
+        input_bundle_hash = input_bundle.calculate_hash()
+
+        # Check for existing run (FR-4.4)
+        if config.skip_if_exists and input_bundle_hash in self.run_cache:
+            existing_run = self.run_cache[input_bundle_hash]
+            # Create skipped run referencing existing
+            skipped_run = PromptRun.create(
+                template_id=template.id,
+                input_bundle_hash=input_bundle_hash,
+                config=config,
+            )
+            skipped_run.mark_skipped()
+            skipped_run.metadata["skipped_due_to"] = existing_run.id
+            return skipped_run
+
+        # Create run
+        run = PromptRun.create(
+            template_id=template.id,
+            input_bundle_hash=input_bundle_hash,
+            config=config,
+        )
+
+        # Create manifest
+        manifest = RunManifest.create(
+            run_id=run.id,
+            template_id=template.id,
+            template_name=template.name,
+            template_digest=template.content_digest,
+        )
+        manifest.compiled_prompt_digest = compiled.content_digest
+        manifest.model_config = config.to_dict()
+
+        # Add resolved inputs to manifest
+        for resolved in resolution_result.context.resolved_macros:
+            if resolved.artifact:
+                manifest.add_resolved_input(
+                    name=resolved.artifact.name,
+                    artifact_id=resolved.artifact.id,
+                    space_id=resolved.space_id or "",
+                    digest=resolved.artifact.content_digest,
+                )
+                # Add dependency edge
+                manifest.add_dependency_edge(
+                    source_id=resolved.artifact.id,
+                    target_id=run.id,
+                    edge_type="requires",
+                )
+
+        # Record timing
+        manifest.set_timing("analysis", analysis_time)
+        manifest.set_timing("compilation", compilation_time)
+
+        # Stage 3: Prompt Processing
+        run.advance_stage(ExecutionStage.PROCESSING)
+
+        try:
+            start_time = time.time()
+            llm_response = self.llm_adapter.execute_prompt(
+                compiled.content,
+                config,
+            )
+            processing_time = time.time() - start_time
+            manifest.set_timing("processing", processing_time)
+
+            # Store output as generated artifact
+            output_artifact = self.artifact_service.create_artifact(
+                space_id=template.space_id,
+                name=f"{template.name}-output-{run.id[:8]}",
+                content=llm_response.content,
+                artifact_type=ArtifactType.GENERATED,
+            )
+
+            manifest.add_output_artifact(
+                artifact_id=output_artifact.id,
+                name=output_artifact.name,
+                digest=output_artifact.content_digest,
+                artifact_type=output_artifact.artifact_type.value,
+            )
+
+            # Add generation edge
+            manifest.add_dependency_edge(
+                source_id=run.id,
+                target_id=output_artifact.id,
+                edge_type="generates",
+            )
+
+            # Mark complete
+            run.mark_complete()
+            run.metadata["manifest"] = manifest.to_dict()
+            run.metadata["output_artifact_id"] = output_artifact.id
+
+            # Cache run
+            self.run_cache[input_bundle_hash] = run
+
+        except Exception as e:
+            run.mark_failed(str(e))
+            run.metadata["manifest"] = manifest.to_dict()
+
+        return run
+
+    def _create_failed_run(
+        self,
+        template: PromptTemplate,
+        error: str,
+        config: RunConfig,
+    ) -> PromptRun:
+        """
+        Create a failed run.
+
+        Args:
+            template: Template
+            error: Error message
+            config: Config
+
+        Returns:
+            Failed PromptRun
+        """
+        run = PromptRun.create(
+            template_id=template.id,
+            input_bundle_hash="failed",
+            config=config,
+        )
+        run.mark_failed(error)
+        return run
+
+    def get_run_by_hash(self, input_bundle_hash: str) -> Optional[PromptRun]:
+        """
+        Retrieve cached run by input bundle hash.
+
+        Args:
+            input_bundle_hash: Hash to lookup
+
+        Returns:
+            PromptRun if found, None otherwise
+        """
+        return self.run_cache.get(input_bundle_hash)
+
+    def clear_cache(self) -> None:
+        """Clear the run cache."""
+        self.run_cache.clear()