Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Implement three-stage execution lifecycle with idempotent runs and complete provenance tracking via RunManifest. Core Features: - PromptRun model with execution lifecycle stages: 1. Analysis: Template analysis and macro extraction 2. Compilation: Macro resolution and context compilation 3. Processing: LLM execution and output generation - InputBundleHash for deterministic idempotency (FR-4.3) - RunManifest for complete execution provenance (FR-5) - LLMAdapter interface for pluggable model providers - MockLLMAdapter for testing without API calls - PromptExecutionEngine orchestrating full lifecycle Idempotent Execution (FR-4.4): - Calculate SHA-256 hash of complete input context - Skip execution if identical hash exists - Cache successful runs by hash - Support force re-execution via config flag RunManifest Tracking (FR-5.2): - Template metadata (id, name, digest) - Resolved input artifacts and digests - Compiled prompt digest - Model configuration - Output artifacts - Dependency edges for graph construction - Timing metadata for performance analysis Tests (27 passing): - 17 execution model tests (config, bundle, runs, stages) - 10 engine tests (execution, idempotency, errors, caching) Implements: - FR-4.1: Three-stage execution lifecycle - FR-4.2: CompiledPrompt during compilation - FR-4.3: InputBundleHash calculation - FR-4.4: Skip execution for identical hashes - FR-5.1: RunManifest persistence - FR-5.2: Complete manifest contents - FR-5.3: Nested run linking (foundation) Files Created: - markitect/prompts/execution/models.py - markitect/prompts/execution/manifest.py - markitect/prompts/execution/llm_adapter.py - markitect/prompts/execution/engine.py - migrations/prompts/003_create_runs_and_manifests.sql - tests/unit/prompts/test_execution_models.py - tests/unit/prompts/test_execution_engine.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
241 lines
7.4 KiB
Python
241 lines
7.4 KiB
Python
"""Unit tests for execution models."""
|
|
|
|
import pytest
|
|
from markitect.prompts.execution.models import (
|
|
RunConfig,
|
|
InputBundle,
|
|
LLMResponse,
|
|
PromptRun,
|
|
ExecutionStage,
|
|
RunStatus,
|
|
)
|
|
|
|
|
|
class TestRunConfig:
|
|
"""Tests for RunConfig."""
|
|
|
|
def test_create_default_config(self):
|
|
"""Test creating default config."""
|
|
config = RunConfig()
|
|
assert config.model_name == "gpt-4"
|
|
assert config.temperature == 0.7
|
|
assert config.max_tokens == 2000
|
|
assert config.max_depth == 3
|
|
assert config.skip_if_exists is True
|
|
|
|
def test_create_custom_config(self):
|
|
"""Test creating custom config."""
|
|
config = RunConfig(
|
|
model_name="gpt-3.5-turbo",
|
|
temperature=0.5,
|
|
max_tokens=1000,
|
|
max_depth=5,
|
|
skip_if_exists=False,
|
|
)
|
|
assert config.model_name == "gpt-3.5-turbo"
|
|
assert config.temperature == 0.5
|
|
assert config.max_tokens == 1000
|
|
assert config.max_depth == 5
|
|
assert config.skip_if_exists is False
|
|
|
|
def test_config_to_dict(self):
|
|
"""Test serialization."""
|
|
config = RunConfig(model_name="test-model")
|
|
data = config.to_dict()
|
|
assert data["model_name"] == "test-model"
|
|
assert "temperature" in data
|
|
|
|
def test_config_from_dict(self):
|
|
"""Test deserialization."""
|
|
data = {
|
|
"model_name": "custom-model",
|
|
"temperature": 0.9,
|
|
"max_tokens": 500,
|
|
}
|
|
config = RunConfig.from_dict(data)
|
|
assert config.model_name == "custom-model"
|
|
assert config.temperature == 0.9
|
|
assert config.max_tokens == 500
|
|
|
|
|
|
class TestInputBundle:
|
|
"""Tests for InputBundle."""
|
|
|
|
def test_create_input_bundle(self):
|
|
"""Test creating input bundle."""
|
|
bundle = InputBundle(
|
|
template_digest="abc123",
|
|
dependency_digests={"dep1": "def456", "dep2": "ghi789"},
|
|
resolution_config_hash="config123",
|
|
model_config={"model": "gpt-4", "temp": 0.7},
|
|
)
|
|
assert bundle.template_digest == "abc123"
|
|
assert len(bundle.dependency_digests) == 2
|
|
|
|
def test_calculate_hash_deterministic(self):
|
|
"""Test hash calculation is deterministic."""
|
|
bundle1 = InputBundle(
|
|
template_digest="abc",
|
|
dependency_digests={"a": "1", "b": "2"},
|
|
resolution_config_hash="conf",
|
|
model_config={"model": "gpt-4"},
|
|
)
|
|
bundle2 = InputBundle(
|
|
template_digest="abc",
|
|
dependency_digests={"b": "2", "a": "1"}, # Different order
|
|
resolution_config_hash="conf",
|
|
model_config={"model": "gpt-4"},
|
|
)
|
|
# Should produce same hash regardless of dict order
|
|
assert bundle1.calculate_hash() == bundle2.calculate_hash()
|
|
|
|
def test_calculate_hash_changes_with_content(self):
|
|
"""Test hash changes when content changes."""
|
|
bundle1 = InputBundle(
|
|
template_digest="abc",
|
|
dependency_digests={},
|
|
resolution_config_hash="conf",
|
|
model_config={},
|
|
)
|
|
bundle2 = InputBundle(
|
|
template_digest="xyz", # Different template
|
|
dependency_digests={},
|
|
resolution_config_hash="conf",
|
|
model_config={},
|
|
)
|
|
assert bundle1.calculate_hash() != bundle2.calculate_hash()
|
|
|
|
def test_to_dict_includes_hash(self):
|
|
"""Test dictionary includes hash."""
|
|
bundle = InputBundle(
|
|
template_digest="abc",
|
|
dependency_digests={},
|
|
resolution_config_hash="conf",
|
|
model_config={},
|
|
)
|
|
data = bundle.to_dict()
|
|
assert "input_bundle_hash" in data
|
|
assert data["input_bundle_hash"] == bundle.calculate_hash()
|
|
|
|
|
|
class TestLLMResponse:
|
|
"""Tests for LLMResponse."""
|
|
|
|
def test_create_response(self):
|
|
"""Test creating LLM response."""
|
|
response = LLMResponse(
|
|
content="Generated text",
|
|
model="gpt-4",
|
|
usage={"total_tokens": 100},
|
|
finish_reason="stop",
|
|
)
|
|
assert response.content == "Generated text"
|
|
assert response.model == "gpt-4"
|
|
assert response.usage["total_tokens"] == 100
|
|
|
|
def test_response_to_dict(self):
|
|
"""Test serialization."""
|
|
response = LLMResponse(
|
|
content="Text",
|
|
model="gpt-4",
|
|
)
|
|
data = response.to_dict()
|
|
assert data["content"] == "Text"
|
|
assert data["model"] == "gpt-4"
|
|
|
|
|
|
class TestPromptRun:
|
|
"""Tests for PromptRun."""
|
|
|
|
def test_create_run(self):
|
|
"""Test creating run."""
|
|
run = PromptRun.create(
|
|
template_id="template-1",
|
|
input_bundle_hash="hash123",
|
|
)
|
|
assert run.id # Has UUID
|
|
assert run.template_id == "template-1"
|
|
assert run.input_bundle_hash == "hash123"
|
|
assert run.status == RunStatus.PENDING
|
|
assert run.stage == ExecutionStage.PENDING
|
|
assert run.depth == 0
|
|
|
|
def test_create_nested_run(self):
|
|
"""Test creating nested run."""
|
|
run = PromptRun.create(
|
|
template_id="template-1",
|
|
input_bundle_hash="hash",
|
|
parent_run_id="parent-123",
|
|
depth=2,
|
|
)
|
|
assert run.parent_run_id == "parent-123"
|
|
assert run.depth == 2
|
|
|
|
def test_advance_stage(self):
|
|
"""Test advancing execution stage."""
|
|
run = PromptRun.create(
|
|
template_id="template-1",
|
|
input_bundle_hash="hash",
|
|
)
|
|
assert run.stage == ExecutionStage.PENDING
|
|
|
|
run.advance_stage(ExecutionStage.ANALYSIS)
|
|
assert run.stage == ExecutionStage.ANALYSIS
|
|
|
|
run.advance_stage(ExecutionStage.PROCESSING)
|
|
assert run.stage == ExecutionStage.PROCESSING
|
|
assert run.status == RunStatus.RUNNING
|
|
|
|
def test_mark_complete(self):
|
|
"""Test marking run as complete."""
|
|
run = PromptRun.create(
|
|
template_id="template-1",
|
|
input_bundle_hash="hash",
|
|
)
|
|
run.mark_complete()
|
|
|
|
assert run.status == RunStatus.SUCCESS
|
|
assert run.stage == ExecutionStage.COMPLETE
|
|
assert run.completed_at is not None
|
|
assert run.is_complete()
|
|
|
|
def test_mark_failed(self):
|
|
"""Test marking run as failed."""
|
|
run = PromptRun.create(
|
|
template_id="template-1",
|
|
input_bundle_hash="hash",
|
|
)
|
|
run.mark_failed("Error message")
|
|
|
|
assert run.status == RunStatus.FAILED
|
|
assert run.stage == ExecutionStage.FAILED
|
|
assert run.error_message == "Error message"
|
|
assert run.completed_at is not None
|
|
assert run.is_complete()
|
|
|
|
def test_mark_skipped(self):
|
|
"""Test marking run as skipped."""
|
|
run = PromptRun.create(
|
|
template_id="template-1",
|
|
input_bundle_hash="hash",
|
|
)
|
|
run.mark_skipped()
|
|
|
|
assert run.status == RunStatus.SKIPPED
|
|
assert run.completed_at is not None
|
|
assert run.is_complete()
|
|
|
|
def test_run_to_dict(self):
|
|
"""Test serialization."""
|
|
run = PromptRun.create(
|
|
template_id="template-1",
|
|
input_bundle_hash="hash",
|
|
)
|
|
data = run.to_dict()
|
|
|
|
assert data["id"] == run.id
|
|
assert data["template_id"] == "template-1"
|
|
assert data["input_bundle_hash"] == "hash"
|
|
assert data["status"] == "pending"
|
|
assert data["stage"] == "pending"
|