Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Implement three-stage execution lifecycle with idempotent runs and complete provenance tracking via RunManifest. Core Features: - PromptRun model with execution lifecycle stages: 1. Analysis: Template analysis and macro extraction 2. Compilation: Macro resolution and context compilation 3. Processing: LLM execution and output generation - InputBundleHash for deterministic idempotency (FR-4.3) - RunManifest for complete execution provenance (FR-5) - LLMAdapter interface for pluggable model providers - MockLLMAdapter for testing without API calls - PromptExecutionEngine orchestrating full lifecycle Idempotent Execution (FR-4.4): - Calculate SHA-256 hash of complete input context - Skip execution if identical hash exists - Cache successful runs by hash - Support force re-execution via config flag RunManifest Tracking (FR-5.2): - Template metadata (id, name, digest) - Resolved input artifacts and digests - Compiled prompt digest - Model configuration - Output artifacts - Dependency edges for graph construction - Timing metadata for performance analysis Tests (27 passing): - 17 execution model tests (config, bundle, runs, stages) - 10 engine tests (execution, idempotency, errors, caching) Implements: - FR-4.1: Three-stage execution lifecycle - FR-4.2: CompiledPrompt during compilation - FR-4.3: InputBundleHash calculation - FR-4.4: Skip execution for identical hashes - FR-5.1: RunManifest persistence - FR-5.2: Complete manifest contents - FR-5.3: Nested run linking (foundation) Files Created: - markitect/prompts/execution/models.py - markitect/prompts/execution/manifest.py - markitect/prompts/execution/llm_adapter.py - markitect/prompts/execution/engine.py - migrations/prompts/003_create_runs_and_manifests.sql - tests/unit/prompts/test_execution_models.py - tests/unit/prompts/test_execution_engine.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
304 lines
9.4 KiB
Python
304 lines
9.4 KiB
Python
"""
|
|
Models for prompt execution.
|
|
|
|
Implements FR-4: PromptRun Lifecycle
|
|
Defines execution stages, run configurations, and input bundles.
|
|
"""
|
|
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List, Optional
|
|
from enum import Enum
|
|
|
|
from markitect.prompts.models import calculate_bundle_digest
|
|
|
|
|
|
class ExecutionStage(Enum):
|
|
"""
|
|
Execution lifecycle stages.
|
|
|
|
Implements FR-4.1: PromptRun execution stages
|
|
"""
|
|
PENDING = "pending" # Not started
|
|
ANALYSIS = "analysis" # Template analysis
|
|
COMPILATION = "compilation" # Context compilation
|
|
PROCESSING = "processing" # LLM execution
|
|
COMPLETE = "complete" # Successfully finished
|
|
FAILED = "failed" # Execution failed
|
|
|
|
|
|
class RunStatus(Enum):
|
|
"""Overall status of a run."""
|
|
PENDING = "pending"
|
|
RUNNING = "running"
|
|
SUCCESS = "success"
|
|
FAILED = "failed"
|
|
SKIPPED = "skipped" # Skipped due to identical InputBundleHash
|
|
|
|
|
|
@dataclass
|
|
class RunConfig:
|
|
"""
|
|
Configuration for prompt execution.
|
|
|
|
Attributes:
|
|
model_name: LLM model to use
|
|
temperature: Model temperature (0.0-1.0)
|
|
max_tokens: Maximum tokens to generate
|
|
model_params: Additional model parameters
|
|
max_depth: Maximum generation depth for nested runs
|
|
skip_if_exists: Skip if identical InputBundleHash exists (FR-4.4)
|
|
timeout_seconds: Execution timeout
|
|
"""
|
|
model_name: str = "gpt-4"
|
|
temperature: float = 0.7
|
|
max_tokens: int = 2000
|
|
model_params: Dict[str, Any] = field(default_factory=dict)
|
|
max_depth: int = 3
|
|
skip_if_exists: bool = True
|
|
timeout_seconds: int = 300
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary."""
|
|
return {
|
|
"model_name": self.model_name,
|
|
"temperature": self.temperature,
|
|
"max_tokens": self.max_tokens,
|
|
"model_params": self.model_params,
|
|
"max_depth": self.max_depth,
|
|
"skip_if_exists": self.skip_if_exists,
|
|
"timeout_seconds": self.timeout_seconds,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> "RunConfig":
|
|
"""Create from dictionary."""
|
|
return cls(
|
|
model_name=data.get("model_name", "gpt-4"),
|
|
temperature=data.get("temperature", 0.7),
|
|
max_tokens=data.get("max_tokens", 2000),
|
|
model_params=data.get("model_params", {}),
|
|
max_depth=data.get("max_depth", 3),
|
|
skip_if_exists=data.get("skip_if_exists", True),
|
|
timeout_seconds=data.get("timeout_seconds", 300),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class InputBundle:
|
|
"""
|
|
Complete input context for execution.
|
|
|
|
Implements FR-4.3: InputBundleHash calculation
|
|
|
|
The InputBundle captures all inputs that affect execution output,
|
|
enabling idempotent execution through content-based hashing.
|
|
|
|
Attributes:
|
|
template_digest: SHA-256 digest of template content
|
|
dependency_digests: Map of dependency name -> digest
|
|
resolution_config_hash: Hash of resolution configuration
|
|
model_config: Model configuration
|
|
compilation_options: Compilation settings
|
|
"""
|
|
template_digest: str
|
|
dependency_digests: Dict[str, str]
|
|
resolution_config_hash: str
|
|
model_config: Dict[str, Any]
|
|
compilation_options: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
def calculate_hash(self) -> str:
|
|
"""
|
|
Calculate deterministic hash of input bundle.
|
|
|
|
Implements FR-4.3: InputBundleHash calculation
|
|
|
|
Components (sorted for determinism):
|
|
1. Template content digest
|
|
2. Sorted dependency digests by name
|
|
3. Resolution configuration hash
|
|
4. Model settings (name, temperature, etc.)
|
|
5. Compilation options
|
|
|
|
Returns:
|
|
SHA-256 hash of complete input bundle
|
|
"""
|
|
components = {
|
|
"template": self.template_digest,
|
|
"dependencies": ":".join(
|
|
f"{k}={v}" for k, v in sorted(self.dependency_digests.items())
|
|
),
|
|
"resolution_config": self.resolution_config_hash,
|
|
"model": ":".join(
|
|
f"{k}={v}" for k, v in sorted(self.model_config.items())
|
|
),
|
|
"compilation": ":".join(
|
|
f"{k}={v}" for k, v in sorted(self.compilation_options.items())
|
|
),
|
|
}
|
|
return calculate_bundle_digest(components)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary."""
|
|
return {
|
|
"template_digest": self.template_digest,
|
|
"dependency_digests": self.dependency_digests,
|
|
"resolution_config_hash": self.resolution_config_hash,
|
|
"model_config": self.model_config,
|
|
"compilation_options": self.compilation_options,
|
|
"input_bundle_hash": self.calculate_hash(),
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class LLMResponse:
|
|
"""
|
|
Response from LLM execution.
|
|
|
|
Attributes:
|
|
content: Generated content
|
|
model: Model used
|
|
usage: Token usage statistics
|
|
finish_reason: Why generation stopped
|
|
metadata: Additional response metadata
|
|
"""
|
|
content: str
|
|
model: str
|
|
usage: Dict[str, int] = field(default_factory=dict)
|
|
finish_reason: str = "stop"
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary."""
|
|
return {
|
|
"content": self.content,
|
|
"model": self.model,
|
|
"usage": self.usage,
|
|
"finish_reason": self.finish_reason,
|
|
"metadata": self.metadata,
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class PromptRun:
|
|
"""
|
|
Record of a prompt template execution.
|
|
|
|
Implements FR-4: PromptRun Lifecycle
|
|
|
|
Tracks complete execution state through all stages:
|
|
Analysis → Compilation → Processing → Complete/Failed
|
|
|
|
Attributes:
|
|
id: Unique run identifier
|
|
template_id: ID of template being executed
|
|
input_bundle_hash: Hash of input bundle for idempotency
|
|
status: Overall run status
|
|
stage: Current execution stage
|
|
parent_run_id: Parent run ID (for nested generators)
|
|
depth: Nesting depth (0 for top-level)
|
|
config: Execution configuration
|
|
started_at: Execution start time
|
|
completed_at: Execution completion time
|
|
error_message: Error message if failed
|
|
metadata: Additional run metadata
|
|
"""
|
|
id: str
|
|
template_id: str
|
|
input_bundle_hash: str
|
|
status: RunStatus = RunStatus.PENDING
|
|
stage: ExecutionStage = ExecutionStage.PENDING
|
|
parent_run_id: Optional[str] = None
|
|
depth: int = 0
|
|
config: RunConfig = field(default_factory=RunConfig)
|
|
started_at: datetime = field(default_factory=datetime.utcnow)
|
|
completed_at: Optional[datetime] = None
|
|
error_message: Optional[str] = None
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
@classmethod
|
|
def create(
|
|
cls,
|
|
template_id: str,
|
|
input_bundle_hash: str,
|
|
config: Optional[RunConfig] = None,
|
|
parent_run_id: Optional[str] = None,
|
|
depth: int = 0,
|
|
) -> "PromptRun":
|
|
"""
|
|
Create a new run.
|
|
|
|
Args:
|
|
template_id: Template being executed
|
|
input_bundle_hash: Hash of input bundle
|
|
config: Execution configuration
|
|
parent_run_id: Parent run ID for nested execution
|
|
depth: Nesting depth
|
|
|
|
Returns:
|
|
New PromptRun instance
|
|
"""
|
|
return cls(
|
|
id=str(uuid.uuid4()),
|
|
template_id=template_id,
|
|
input_bundle_hash=input_bundle_hash,
|
|
config=config or RunConfig(),
|
|
parent_run_id=parent_run_id,
|
|
depth=depth,
|
|
)
|
|
|
|
def advance_stage(self, stage: ExecutionStage) -> None:
|
|
"""
|
|
Advance to next execution stage.
|
|
|
|
Args:
|
|
stage: New stage
|
|
"""
|
|
self.stage = stage
|
|
if stage == ExecutionStage.PROCESSING:
|
|
self.status = RunStatus.RUNNING
|
|
|
|
def mark_complete(self) -> None:
|
|
"""Mark run as successfully completed."""
|
|
self.stage = ExecutionStage.COMPLETE
|
|
self.status = RunStatus.SUCCESS
|
|
self.completed_at = datetime.utcnow()
|
|
|
|
def mark_failed(self, error: str) -> None:
|
|
"""
|
|
Mark run as failed.
|
|
|
|
Args:
|
|
error: Error message
|
|
"""
|
|
self.stage = ExecutionStage.FAILED
|
|
self.status = RunStatus.FAILED
|
|
self.error_message = error
|
|
self.completed_at = datetime.utcnow()
|
|
|
|
def mark_skipped(self) -> None:
|
|
"""Mark run as skipped (identical hash exists)."""
|
|
self.status = RunStatus.SKIPPED
|
|
self.completed_at = datetime.utcnow()
|
|
|
|
def is_complete(self) -> bool:
|
|
"""Check if run is complete."""
|
|
return self.status in (RunStatus.SUCCESS, RunStatus.FAILED, RunStatus.SKIPPED)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary."""
|
|
return {
|
|
"id": self.id,
|
|
"template_id": self.template_id,
|
|
"input_bundle_hash": self.input_bundle_hash,
|
|
"status": self.status.value,
|
|
"stage": self.stage.value,
|
|
"parent_run_id": self.parent_run_id,
|
|
"depth": self.depth,
|
|
"config": self.config.to_dict(),
|
|
"started_at": self.started_at.isoformat(),
|
|
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
|
|
"error_message": self.error_message,
|
|
}
|