Files
markitect-main/markitect/prompts/execution/engine.py
tegwick c56c92c815
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(prompts): implement Phase 4 - Execution Engine (FR-4, FR-5)
Implement three-stage execution lifecycle with idempotent runs and complete
provenance tracking via RunManifest.

Core Features:
- PromptRun model with execution lifecycle stages:
  1. Analysis: Template analysis and macro extraction
  2. Compilation: Macro resolution and context compilation
  3. Processing: LLM execution and output generation
- InputBundleHash for deterministic idempotency (FR-4.3)
- RunManifest for complete execution provenance (FR-5)
- LLMAdapter interface for pluggable model providers
- MockLLMAdapter for testing without API calls
- PromptExecutionEngine orchestrating full lifecycle

Idempotent Execution (FR-4.4):
- Calculate SHA-256 hash of complete input context
- Skip execution if identical hash exists
- Cache successful runs by hash
- Support force re-execution via config flag

RunManifest Tracking (FR-5.2):
- Template metadata (id, name, digest)
- Resolved input artifacts and digests
- Compiled prompt digest
- Model configuration
- Output artifacts
- Dependency edges for graph construction
- Timing metadata for performance analysis

Tests (27 passing):
- 17 execution model tests (config, bundle, runs, stages)
- 10 engine tests (execution, idempotency, errors, caching)

Implements:
- FR-4.1: Three-stage execution lifecycle
- FR-4.2: CompiledPrompt during compilation
- FR-4.3: InputBundleHash calculation
- FR-4.4: Skip execution for identical hashes
- FR-5.1: RunManifest persistence
- FR-5.2: Complete manifest contents
- FR-5.3: Nested run linking (foundation)

Files Created:
- markitect/prompts/execution/models.py
- markitect/prompts/execution/manifest.py
- markitect/prompts/execution/llm_adapter.py
- markitect/prompts/execution/engine.py
- migrations/prompts/003_create_runs_and_manifests.sql
- tests/unit/prompts/test_execution_models.py
- tests/unit/prompts/test_execution_engine.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 23:15:33 +01:00

269 lines
8.6 KiB
Python

"""
Prompt execution engine.
Implements FR-4: PromptRun Lifecycle
Three-stage execution with idempotent runs.
"""
import time
from typing import Optional, Dict
from markitect.prompts.templates.models import PromptTemplate
from markitect.prompts.templates.analyzer import TemplateAnalyzer
from markitect.prompts.resolver.resolver import PromptResolver
from markitect.prompts.resolver.compiler import ContextCompiler
from markitect.prompts.resolver.strategy import ResolutionConfig
from markitect.prompts.execution.models import (
PromptRun,
ExecutionStage,
RunConfig,
RunStatus,
InputBundle,
)
from markitect.prompts.execution.manifest import RunManifest
from markitect.prompts.execution.llm_adapter import LLMAdapter
from markitect.prompts.models import calculate_content_digest, ArtifactType
from markitect.prompts.services.artifact_service import ArtifactService
class PromptExecutionEngine:
"""
Engine for executing prompt templates.
Implements FR-4: PromptRun Lifecycle
- Stage 1: Template Analysis
- Stage 2: Context Compilation
- Stage 3: Prompt Processing
Implements FR-4.4: Idempotent execution via InputBundleHash
"""
def __init__(
self,
artifact_service: ArtifactService,
template_analyzer: TemplateAnalyzer,
resolver: PromptResolver,
compiler: ContextCompiler,
llm_adapter: LLMAdapter,
):
"""
Initialize execution engine.
Args:
artifact_service: For artifact operations
template_analyzer: For template analysis
resolver: For macro resolution
compiler: For context compilation
llm_adapter: For LLM execution
"""
self.artifact_service = artifact_service
self.template_analyzer = template_analyzer
self.resolver = resolver
self.compiler = compiler
self.llm_adapter = llm_adapter
self.run_cache: Dict[str, PromptRun] = {} # Cache by input_bundle_hash
def execute(
self,
template: PromptTemplate,
template_content: str,
resolution_config: ResolutionConfig,
run_config: Optional[RunConfig] = None,
) -> PromptRun:
"""
Execute a prompt template.
Implements FR-4.1: Three-stage execution
1. Analysis: Analyze template and extract macros
2. Compilation: Resolve macros and compile prompt
3. Processing: Execute with LLM
Implements FR-4.4: Skip if identical InputBundleHash exists
Args:
template: Template to execute
template_content: Template content
resolution_config: Resolution configuration
run_config: Execution configuration
Returns:
PromptRun with execution results
"""
config = run_config or RunConfig()
# Stage 1: Template Analysis
start_time = time.time()
if not template.analyzed:
self.template_analyzer.analyze(template, template_content)
analysis_time = time.time() - start_time
# Stage 2: Context Compilation
start_time = time.time()
resolution_result = self.resolver.resolve_template(template, resolution_config)
if not resolution_result.success:
# Resolution failed - create failed run
run = self._create_failed_run(
template,
"Resolution failed: " + ", ".join(resolution_result.context.errors),
config,
)
return run
compiled = self.compiler.compile(template, template_content, resolution_result)
compilation_time = time.time() - start_time
# Calculate InputBundleHash (FR-4.3)
input_bundle = InputBundle(
template_digest=template.content_digest,
dependency_digests=compiled.dependency_digests,
resolution_config_hash=calculate_content_digest(
str(resolution_config.to_dict())
),
model_config={
"model_name": config.model_name,
"temperature": config.temperature,
"max_tokens": config.max_tokens,
},
)
input_bundle_hash = input_bundle.calculate_hash()
# Check for existing run (FR-4.4)
if config.skip_if_exists and input_bundle_hash in self.run_cache:
existing_run = self.run_cache[input_bundle_hash]
# Create skipped run referencing existing
skipped_run = PromptRun.create(
template_id=template.id,
input_bundle_hash=input_bundle_hash,
config=config,
)
skipped_run.mark_skipped()
skipped_run.metadata["skipped_due_to"] = existing_run.id
return skipped_run
# Create run
run = PromptRun.create(
template_id=template.id,
input_bundle_hash=input_bundle_hash,
config=config,
)
# Create manifest
manifest = RunManifest.create(
run_id=run.id,
template_id=template.id,
template_name=template.name,
template_digest=template.content_digest,
)
manifest.compiled_prompt_digest = compiled.content_digest
manifest.model_config = config.to_dict()
# Add resolved inputs to manifest
for resolved in resolution_result.context.resolved_macros:
if resolved.artifact:
manifest.add_resolved_input(
name=resolved.artifact.name,
artifact_id=resolved.artifact.id,
space_id=resolved.space_id or "",
digest=resolved.artifact.content_digest,
)
# Add dependency edge
manifest.add_dependency_edge(
source_id=resolved.artifact.id,
target_id=run.id,
edge_type="requires",
)
# Record timing
manifest.set_timing("analysis", analysis_time)
manifest.set_timing("compilation", compilation_time)
# Stage 3: Prompt Processing
run.advance_stage(ExecutionStage.PROCESSING)
try:
start_time = time.time()
llm_response = self.llm_adapter.execute_prompt(
compiled.content,
config,
)
processing_time = time.time() - start_time
manifest.set_timing("processing", processing_time)
# Store output as generated artifact
output_artifact = self.artifact_service.create_artifact(
space_id=template.space_id,
name=f"{template.name}-output-{run.id[:8]}",
content=llm_response.content,
artifact_type=ArtifactType.GENERATED,
)
manifest.add_output_artifact(
artifact_id=output_artifact.id,
name=output_artifact.name,
digest=output_artifact.content_digest,
artifact_type=output_artifact.artifact_type.value,
)
# Add generation edge
manifest.add_dependency_edge(
source_id=run.id,
target_id=output_artifact.id,
edge_type="generates",
)
# Mark complete
run.mark_complete()
run.metadata["manifest"] = manifest.to_dict()
run.metadata["output_artifact_id"] = output_artifact.id
# Cache run
self.run_cache[input_bundle_hash] = run
except Exception as e:
run.mark_failed(str(e))
run.metadata["manifest"] = manifest.to_dict()
return run
def _create_failed_run(
self,
template: PromptTemplate,
error: str,
config: RunConfig,
) -> PromptRun:
"""
Create a failed run.
Args:
template: Template
error: Error message
config: Config
Returns:
Failed PromptRun
"""
run = PromptRun.create(
template_id=template.id,
input_bundle_hash="failed",
config=config,
)
run.mark_failed(error)
return run
def get_run_by_hash(self, input_bundle_hash: str) -> Optional[PromptRun]:
"""
Retrieve cached run by input bundle hash.
Args:
input_bundle_hash: Hash to lookup
Returns:
PromptRun if found, None otherwise
"""
return self.run_cache.get(input_bundle_hash)
def clear_cache(self) -> None:
"""Clear the run cache."""
self.run_cache.clear()