Files
markitect-main/markitect/prompts/execution/models.py
tegwick c56c92c815
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(prompts): implement Phase 4 - Execution Engine (FR-4, FR-5)
Implement three-stage execution lifecycle with idempotent runs and complete
provenance tracking via RunManifest.

Core Features:
- PromptRun model with execution lifecycle stages:
  1. Analysis: Template analysis and macro extraction
  2. Compilation: Macro resolution and context compilation
  3. Processing: LLM execution and output generation
- InputBundleHash for deterministic idempotency (FR-4.3)
- RunManifest for complete execution provenance (FR-5)
- LLMAdapter interface for pluggable model providers
- MockLLMAdapter for testing without API calls
- PromptExecutionEngine orchestrating full lifecycle

Idempotent Execution (FR-4.4):
- Calculate SHA-256 hash of complete input context
- Skip execution if identical hash exists
- Cache successful runs by hash
- Support force re-execution via config flag

RunManifest Tracking (FR-5.2):
- Template metadata (id, name, digest)
- Resolved input artifacts and digests
- Compiled prompt digest
- Model configuration
- Output artifacts
- Dependency edges for graph construction
- Timing metadata for performance analysis

Tests (27 passing):
- 17 execution model tests (config, bundle, runs, stages)
- 10 engine tests (execution, idempotency, errors, caching)

Implements:
- FR-4.1: Three-stage execution lifecycle
- FR-4.2: CompiledPrompt during compilation
- FR-4.3: InputBundleHash calculation
- FR-4.4: Skip execution for identical hashes
- FR-5.1: RunManifest persistence
- FR-5.2: Complete manifest contents
- FR-5.3: Nested run linking (foundation)

Files Created:
- markitect/prompts/execution/models.py
- markitect/prompts/execution/manifest.py
- markitect/prompts/execution/llm_adapter.py
- markitect/prompts/execution/engine.py
- migrations/prompts/003_create_runs_and_manifests.sql
- tests/unit/prompts/test_execution_models.py
- tests/unit/prompts/test_execution_engine.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 23:15:33 +01:00

304 lines
9.4 KiB
Python

"""
Models for prompt execution.
Implements FR-4: PromptRun Lifecycle
Defines execution stages, run configurations, and input bundles.
"""
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Any, List, Optional
from enum import Enum
from markitect.prompts.models import calculate_bundle_digest
class ExecutionStage(Enum):
"""
Execution lifecycle stages.
Implements FR-4.1: PromptRun execution stages
"""
PENDING = "pending" # Not started
ANALYSIS = "analysis" # Template analysis
COMPILATION = "compilation" # Context compilation
PROCESSING = "processing" # LLM execution
COMPLETE = "complete" # Successfully finished
FAILED = "failed" # Execution failed
class RunStatus(Enum):
"""Overall status of a run."""
PENDING = "pending"
RUNNING = "running"
SUCCESS = "success"
FAILED = "failed"
SKIPPED = "skipped" # Skipped due to identical InputBundleHash
@dataclass
class RunConfig:
"""
Configuration for prompt execution.
Attributes:
model_name: LLM model to use
temperature: Model temperature (0.0-1.0)
max_tokens: Maximum tokens to generate
model_params: Additional model parameters
max_depth: Maximum generation depth for nested runs
skip_if_exists: Skip if identical InputBundleHash exists (FR-4.4)
timeout_seconds: Execution timeout
"""
model_name: str = "gpt-4"
temperature: float = 0.7
max_tokens: int = 2000
model_params: Dict[str, Any] = field(default_factory=dict)
max_depth: int = 3
skip_if_exists: bool = True
timeout_seconds: int = 300
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"model_name": self.model_name,
"temperature": self.temperature,
"max_tokens": self.max_tokens,
"model_params": self.model_params,
"max_depth": self.max_depth,
"skip_if_exists": self.skip_if_exists,
"timeout_seconds": self.timeout_seconds,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "RunConfig":
"""Create from dictionary."""
return cls(
model_name=data.get("model_name", "gpt-4"),
temperature=data.get("temperature", 0.7),
max_tokens=data.get("max_tokens", 2000),
model_params=data.get("model_params", {}),
max_depth=data.get("max_depth", 3),
skip_if_exists=data.get("skip_if_exists", True),
timeout_seconds=data.get("timeout_seconds", 300),
)
@dataclass
class InputBundle:
"""
Complete input context for execution.
Implements FR-4.3: InputBundleHash calculation
The InputBundle captures all inputs that affect execution output,
enabling idempotent execution through content-based hashing.
Attributes:
template_digest: SHA-256 digest of template content
dependency_digests: Map of dependency name -> digest
resolution_config_hash: Hash of resolution configuration
model_config: Model configuration
compilation_options: Compilation settings
"""
template_digest: str
dependency_digests: Dict[str, str]
resolution_config_hash: str
model_config: Dict[str, Any]
compilation_options: Dict[str, Any] = field(default_factory=dict)
def calculate_hash(self) -> str:
"""
Calculate deterministic hash of input bundle.
Implements FR-4.3: InputBundleHash calculation
Components (sorted for determinism):
1. Template content digest
2. Sorted dependency digests by name
3. Resolution configuration hash
4. Model settings (name, temperature, etc.)
5. Compilation options
Returns:
SHA-256 hash of complete input bundle
"""
components = {
"template": self.template_digest,
"dependencies": ":".join(
f"{k}={v}" for k, v in sorted(self.dependency_digests.items())
),
"resolution_config": self.resolution_config_hash,
"model": ":".join(
f"{k}={v}" for k, v in sorted(self.model_config.items())
),
"compilation": ":".join(
f"{k}={v}" for k, v in sorted(self.compilation_options.items())
),
}
return calculate_bundle_digest(components)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"template_digest": self.template_digest,
"dependency_digests": self.dependency_digests,
"resolution_config_hash": self.resolution_config_hash,
"model_config": self.model_config,
"compilation_options": self.compilation_options,
"input_bundle_hash": self.calculate_hash(),
}
@dataclass
class LLMResponse:
"""
Response from LLM execution.
Attributes:
content: Generated content
model: Model used
usage: Token usage statistics
finish_reason: Why generation stopped
metadata: Additional response metadata
"""
content: str
model: str
usage: Dict[str, int] = field(default_factory=dict)
finish_reason: str = "stop"
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"content": self.content,
"model": self.model,
"usage": self.usage,
"finish_reason": self.finish_reason,
"metadata": self.metadata,
}
@dataclass
class PromptRun:
"""
Record of a prompt template execution.
Implements FR-4: PromptRun Lifecycle
Tracks complete execution state through all stages:
Analysis → Compilation → Processing → Complete/Failed
Attributes:
id: Unique run identifier
template_id: ID of template being executed
input_bundle_hash: Hash of input bundle for idempotency
status: Overall run status
stage: Current execution stage
parent_run_id: Parent run ID (for nested generators)
depth: Nesting depth (0 for top-level)
config: Execution configuration
started_at: Execution start time
completed_at: Execution completion time
error_message: Error message if failed
metadata: Additional run metadata
"""
id: str
template_id: str
input_bundle_hash: str
status: RunStatus = RunStatus.PENDING
stage: ExecutionStage = ExecutionStage.PENDING
parent_run_id: Optional[str] = None
depth: int = 0
config: RunConfig = field(default_factory=RunConfig)
started_at: datetime = field(default_factory=datetime.utcnow)
completed_at: Optional[datetime] = None
error_message: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
@classmethod
def create(
cls,
template_id: str,
input_bundle_hash: str,
config: Optional[RunConfig] = None,
parent_run_id: Optional[str] = None,
depth: int = 0,
) -> "PromptRun":
"""
Create a new run.
Args:
template_id: Template being executed
input_bundle_hash: Hash of input bundle
config: Execution configuration
parent_run_id: Parent run ID for nested execution
depth: Nesting depth
Returns:
New PromptRun instance
"""
return cls(
id=str(uuid.uuid4()),
template_id=template_id,
input_bundle_hash=input_bundle_hash,
config=config or RunConfig(),
parent_run_id=parent_run_id,
depth=depth,
)
def advance_stage(self, stage: ExecutionStage) -> None:
"""
Advance to next execution stage.
Args:
stage: New stage
"""
self.stage = stage
if stage == ExecutionStage.PROCESSING:
self.status = RunStatus.RUNNING
def mark_complete(self) -> None:
"""Mark run as successfully completed."""
self.stage = ExecutionStage.COMPLETE
self.status = RunStatus.SUCCESS
self.completed_at = datetime.utcnow()
def mark_failed(self, error: str) -> None:
"""
Mark run as failed.
Args:
error: Error message
"""
self.stage = ExecutionStage.FAILED
self.status = RunStatus.FAILED
self.error_message = error
self.completed_at = datetime.utcnow()
def mark_skipped(self) -> None:
"""Mark run as skipped (identical hash exists)."""
self.status = RunStatus.SKIPPED
self.completed_at = datetime.utcnow()
def is_complete(self) -> bool:
"""Check if run is complete."""
return self.status in (RunStatus.SUCCESS, RunStatus.FAILED, RunStatus.SKIPPED)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"id": self.id,
"template_id": self.template_id,
"input_bundle_hash": self.input_bundle_hash,
"status": self.status.value,
"stage": self.stage.value,
"parent_run_id": self.parent_run_id,
"depth": self.depth,
"config": self.config.to_dict(),
"started_at": self.started_at.isoformat(),
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
"error_message": self.error_message,
}