feat(prompts): implement Phase 4 - Execution Engine (FR-4, FR-5)
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Implement three-stage execution lifecycle with idempotent runs and complete provenance tracking via RunManifest. Core Features: - PromptRun model with execution lifecycle stages: 1. Analysis: Template analysis and macro extraction 2. Compilation: Macro resolution and context compilation 3. Processing: LLM execution and output generation - InputBundleHash for deterministic idempotency (FR-4.3) - RunManifest for complete execution provenance (FR-5) - LLMAdapter interface for pluggable model providers - MockLLMAdapter for testing without API calls - PromptExecutionEngine orchestrating full lifecycle Idempotent Execution (FR-4.4): - Calculate SHA-256 hash of complete input context - Skip execution if identical hash exists - Cache successful runs by hash - Support force re-execution via config flag RunManifest Tracking (FR-5.2): - Template metadata (id, name, digest) - Resolved input artifacts and digests - Compiled prompt digest - Model configuration - Output artifacts - Dependency edges for graph construction - Timing metadata for performance analysis Tests (27 passing): - 17 execution model tests (config, bundle, runs, stages) - 10 engine tests (execution, idempotency, errors, caching) Implements: - FR-4.1: Three-stage execution lifecycle - FR-4.2: CompiledPrompt during compilation - FR-4.3: InputBundleHash calculation - FR-4.4: Skip execution for identical hashes - FR-5.1: RunManifest persistence - FR-5.2: Complete manifest contents - FR-5.3: Nested run linking (foundation) Files Created: - markitect/prompts/execution/models.py - markitect/prompts/execution/manifest.py - markitect/prompts/execution/llm_adapter.py - markitect/prompts/execution/engine.py - migrations/prompts/003_create_runs_and_manifests.sql - tests/unit/prompts/test_execution_models.py - tests/unit/prompts/test_execution_engine.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
268
markitect/prompts/execution/engine.py
Normal file
268
markitect/prompts/execution/engine.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""
|
||||
Prompt execution engine.
|
||||
|
||||
Implements FR-4: PromptRun Lifecycle
|
||||
Three-stage execution with idempotent runs.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Optional, Dict
|
||||
|
||||
from markitect.prompts.templates.models import PromptTemplate
|
||||
from markitect.prompts.templates.analyzer import TemplateAnalyzer
|
||||
from markitect.prompts.resolver.resolver import PromptResolver
|
||||
from markitect.prompts.resolver.compiler import ContextCompiler
|
||||
from markitect.prompts.resolver.strategy import ResolutionConfig
|
||||
from markitect.prompts.execution.models import (
|
||||
PromptRun,
|
||||
ExecutionStage,
|
||||
RunConfig,
|
||||
RunStatus,
|
||||
InputBundle,
|
||||
)
|
||||
from markitect.prompts.execution.manifest import RunManifest
|
||||
from markitect.prompts.execution.llm_adapter import LLMAdapter
|
||||
from markitect.prompts.models import calculate_content_digest, ArtifactType
|
||||
from markitect.prompts.services.artifact_service import ArtifactService
|
||||
|
||||
|
||||
class PromptExecutionEngine:
|
||||
"""
|
||||
Engine for executing prompt templates.
|
||||
|
||||
Implements FR-4: PromptRun Lifecycle
|
||||
- Stage 1: Template Analysis
|
||||
- Stage 2: Context Compilation
|
||||
- Stage 3: Prompt Processing
|
||||
|
||||
Implements FR-4.4: Idempotent execution via InputBundleHash
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
artifact_service: ArtifactService,
|
||||
template_analyzer: TemplateAnalyzer,
|
||||
resolver: PromptResolver,
|
||||
compiler: ContextCompiler,
|
||||
llm_adapter: LLMAdapter,
|
||||
):
|
||||
"""
|
||||
Initialize execution engine.
|
||||
|
||||
Args:
|
||||
artifact_service: For artifact operations
|
||||
template_analyzer: For template analysis
|
||||
resolver: For macro resolution
|
||||
compiler: For context compilation
|
||||
llm_adapter: For LLM execution
|
||||
"""
|
||||
self.artifact_service = artifact_service
|
||||
self.template_analyzer = template_analyzer
|
||||
self.resolver = resolver
|
||||
self.compiler = compiler
|
||||
self.llm_adapter = llm_adapter
|
||||
self.run_cache: Dict[str, PromptRun] = {} # Cache by input_bundle_hash
|
||||
|
||||
def execute(
|
||||
self,
|
||||
template: PromptTemplate,
|
||||
template_content: str,
|
||||
resolution_config: ResolutionConfig,
|
||||
run_config: Optional[RunConfig] = None,
|
||||
) -> PromptRun:
|
||||
"""
|
||||
Execute a prompt template.
|
||||
|
||||
Implements FR-4.1: Three-stage execution
|
||||
1. Analysis: Analyze template and extract macros
|
||||
2. Compilation: Resolve macros and compile prompt
|
||||
3. Processing: Execute with LLM
|
||||
|
||||
Implements FR-4.4: Skip if identical InputBundleHash exists
|
||||
|
||||
Args:
|
||||
template: Template to execute
|
||||
template_content: Template content
|
||||
resolution_config: Resolution configuration
|
||||
run_config: Execution configuration
|
||||
|
||||
Returns:
|
||||
PromptRun with execution results
|
||||
"""
|
||||
config = run_config or RunConfig()
|
||||
|
||||
# Stage 1: Template Analysis
|
||||
start_time = time.time()
|
||||
if not template.analyzed:
|
||||
self.template_analyzer.analyze(template, template_content)
|
||||
analysis_time = time.time() - start_time
|
||||
|
||||
# Stage 2: Context Compilation
|
||||
start_time = time.time()
|
||||
resolution_result = self.resolver.resolve_template(template, resolution_config)
|
||||
|
||||
if not resolution_result.success:
|
||||
# Resolution failed - create failed run
|
||||
run = self._create_failed_run(
|
||||
template,
|
||||
"Resolution failed: " + ", ".join(resolution_result.context.errors),
|
||||
config,
|
||||
)
|
||||
return run
|
||||
|
||||
compiled = self.compiler.compile(template, template_content, resolution_result)
|
||||
compilation_time = time.time() - start_time
|
||||
|
||||
# Calculate InputBundleHash (FR-4.3)
|
||||
input_bundle = InputBundle(
|
||||
template_digest=template.content_digest,
|
||||
dependency_digests=compiled.dependency_digests,
|
||||
resolution_config_hash=calculate_content_digest(
|
||||
str(resolution_config.to_dict())
|
||||
),
|
||||
model_config={
|
||||
"model_name": config.model_name,
|
||||
"temperature": config.temperature,
|
||||
"max_tokens": config.max_tokens,
|
||||
},
|
||||
)
|
||||
input_bundle_hash = input_bundle.calculate_hash()
|
||||
|
||||
# Check for existing run (FR-4.4)
|
||||
if config.skip_if_exists and input_bundle_hash in self.run_cache:
|
||||
existing_run = self.run_cache[input_bundle_hash]
|
||||
# Create skipped run referencing existing
|
||||
skipped_run = PromptRun.create(
|
||||
template_id=template.id,
|
||||
input_bundle_hash=input_bundle_hash,
|
||||
config=config,
|
||||
)
|
||||
skipped_run.mark_skipped()
|
||||
skipped_run.metadata["skipped_due_to"] = existing_run.id
|
||||
return skipped_run
|
||||
|
||||
# Create run
|
||||
run = PromptRun.create(
|
||||
template_id=template.id,
|
||||
input_bundle_hash=input_bundle_hash,
|
||||
config=config,
|
||||
)
|
||||
|
||||
# Create manifest
|
||||
manifest = RunManifest.create(
|
||||
run_id=run.id,
|
||||
template_id=template.id,
|
||||
template_name=template.name,
|
||||
template_digest=template.content_digest,
|
||||
)
|
||||
manifest.compiled_prompt_digest = compiled.content_digest
|
||||
manifest.model_config = config.to_dict()
|
||||
|
||||
# Add resolved inputs to manifest
|
||||
for resolved in resolution_result.context.resolved_macros:
|
||||
if resolved.artifact:
|
||||
manifest.add_resolved_input(
|
||||
name=resolved.artifact.name,
|
||||
artifact_id=resolved.artifact.id,
|
||||
space_id=resolved.space_id or "",
|
||||
digest=resolved.artifact.content_digest,
|
||||
)
|
||||
# Add dependency edge
|
||||
manifest.add_dependency_edge(
|
||||
source_id=resolved.artifact.id,
|
||||
target_id=run.id,
|
||||
edge_type="requires",
|
||||
)
|
||||
|
||||
# Record timing
|
||||
manifest.set_timing("analysis", analysis_time)
|
||||
manifest.set_timing("compilation", compilation_time)
|
||||
|
||||
# Stage 3: Prompt Processing
|
||||
run.advance_stage(ExecutionStage.PROCESSING)
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
llm_response = self.llm_adapter.execute_prompt(
|
||||
compiled.content,
|
||||
config,
|
||||
)
|
||||
processing_time = time.time() - start_time
|
||||
manifest.set_timing("processing", processing_time)
|
||||
|
||||
# Store output as generated artifact
|
||||
output_artifact = self.artifact_service.create_artifact(
|
||||
space_id=template.space_id,
|
||||
name=f"{template.name}-output-{run.id[:8]}",
|
||||
content=llm_response.content,
|
||||
artifact_type=ArtifactType.GENERATED,
|
||||
)
|
||||
|
||||
manifest.add_output_artifact(
|
||||
artifact_id=output_artifact.id,
|
||||
name=output_artifact.name,
|
||||
digest=output_artifact.content_digest,
|
||||
artifact_type=output_artifact.artifact_type.value,
|
||||
)
|
||||
|
||||
# Add generation edge
|
||||
manifest.add_dependency_edge(
|
||||
source_id=run.id,
|
||||
target_id=output_artifact.id,
|
||||
edge_type="generates",
|
||||
)
|
||||
|
||||
# Mark complete
|
||||
run.mark_complete()
|
||||
run.metadata["manifest"] = manifest.to_dict()
|
||||
run.metadata["output_artifact_id"] = output_artifact.id
|
||||
|
||||
# Cache run
|
||||
self.run_cache[input_bundle_hash] = run
|
||||
|
||||
except Exception as e:
|
||||
run.mark_failed(str(e))
|
||||
run.metadata["manifest"] = manifest.to_dict()
|
||||
|
||||
return run
|
||||
|
||||
def _create_failed_run(
|
||||
self,
|
||||
template: PromptTemplate,
|
||||
error: str,
|
||||
config: RunConfig,
|
||||
) -> PromptRun:
|
||||
"""
|
||||
Create a failed run.
|
||||
|
||||
Args:
|
||||
template: Template
|
||||
error: Error message
|
||||
config: Config
|
||||
|
||||
Returns:
|
||||
Failed PromptRun
|
||||
"""
|
||||
run = PromptRun.create(
|
||||
template_id=template.id,
|
||||
input_bundle_hash="failed",
|
||||
config=config,
|
||||
)
|
||||
run.mark_failed(error)
|
||||
return run
|
||||
|
||||
def get_run_by_hash(self, input_bundle_hash: str) -> Optional[PromptRun]:
|
||||
"""
|
||||
Retrieve cached run by input bundle hash.
|
||||
|
||||
Args:
|
||||
input_bundle_hash: Hash to lookup
|
||||
|
||||
Returns:
|
||||
PromptRun if found, None otherwise
|
||||
"""
|
||||
return self.run_cache.get(input_bundle_hash)
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clear the run cache."""
|
||||
self.run_cache.clear()
|
||||
Reference in New Issue
Block a user