Files
markitect-main/tests/unit/prompts/test_execution_models.py
tegwick c56c92c815
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(prompts): implement Phase 4 - Execution Engine (FR-4, FR-5)
Implement three-stage execution lifecycle with idempotent runs and complete
provenance tracking via RunManifest.

Core Features:
- PromptRun model with execution lifecycle stages:
  1. Analysis: Template analysis and macro extraction
  2. Compilation: Macro resolution and context compilation
  3. Processing: LLM execution and output generation
- InputBundleHash for deterministic idempotency (FR-4.3)
- RunManifest for complete execution provenance (FR-5)
- LLMAdapter interface for pluggable model providers
- MockLLMAdapter for testing without API calls
- PromptExecutionEngine orchestrating full lifecycle

Idempotent Execution (FR-4.4):
- Calculate SHA-256 hash of complete input context
- Skip execution if identical hash exists
- Cache successful runs by hash
- Support force re-execution via config flag

RunManifest Tracking (FR-5.2):
- Template metadata (id, name, digest)
- Resolved input artifacts and digests
- Compiled prompt digest
- Model configuration
- Output artifacts
- Dependency edges for graph construction
- Timing metadata for performance analysis

Tests (27 passing):
- 17 execution model tests (config, bundle, runs, stages)
- 10 engine tests (execution, idempotency, errors, caching)

Implements:
- FR-4.1: Three-stage execution lifecycle
- FR-4.2: CompiledPrompt during compilation
- FR-4.3: InputBundleHash calculation
- FR-4.4: Skip execution for identical hashes
- FR-5.1: RunManifest persistence
- FR-5.2: Complete manifest contents
- FR-5.3: Nested run linking (foundation)

Files Created:
- markitect/prompts/execution/models.py
- markitect/prompts/execution/manifest.py
- markitect/prompts/execution/llm_adapter.py
- markitect/prompts/execution/engine.py
- migrations/prompts/003_create_runs_and_manifests.sql
- tests/unit/prompts/test_execution_models.py
- tests/unit/prompts/test_execution_engine.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 23:15:33 +01:00

241 lines
7.4 KiB
Python

"""Unit tests for execution models."""
import pytest
from markitect.prompts.execution.models import (
RunConfig,
InputBundle,
LLMResponse,
PromptRun,
ExecutionStage,
RunStatus,
)
class TestRunConfig:
"""Tests for RunConfig."""
def test_create_default_config(self):
"""Test creating default config."""
config = RunConfig()
assert config.model_name == "gpt-4"
assert config.temperature == 0.7
assert config.max_tokens == 2000
assert config.max_depth == 3
assert config.skip_if_exists is True
def test_create_custom_config(self):
"""Test creating custom config."""
config = RunConfig(
model_name="gpt-3.5-turbo",
temperature=0.5,
max_tokens=1000,
max_depth=5,
skip_if_exists=False,
)
assert config.model_name == "gpt-3.5-turbo"
assert config.temperature == 0.5
assert config.max_tokens == 1000
assert config.max_depth == 5
assert config.skip_if_exists is False
def test_config_to_dict(self):
"""Test serialization."""
config = RunConfig(model_name="test-model")
data = config.to_dict()
assert data["model_name"] == "test-model"
assert "temperature" in data
def test_config_from_dict(self):
"""Test deserialization."""
data = {
"model_name": "custom-model",
"temperature": 0.9,
"max_tokens": 500,
}
config = RunConfig.from_dict(data)
assert config.model_name == "custom-model"
assert config.temperature == 0.9
assert config.max_tokens == 500
class TestInputBundle:
"""Tests for InputBundle."""
def test_create_input_bundle(self):
"""Test creating input bundle."""
bundle = InputBundle(
template_digest="abc123",
dependency_digests={"dep1": "def456", "dep2": "ghi789"},
resolution_config_hash="config123",
model_config={"model": "gpt-4", "temp": 0.7},
)
assert bundle.template_digest == "abc123"
assert len(bundle.dependency_digests) == 2
def test_calculate_hash_deterministic(self):
"""Test hash calculation is deterministic."""
bundle1 = InputBundle(
template_digest="abc",
dependency_digests={"a": "1", "b": "2"},
resolution_config_hash="conf",
model_config={"model": "gpt-4"},
)
bundle2 = InputBundle(
template_digest="abc",
dependency_digests={"b": "2", "a": "1"}, # Different order
resolution_config_hash="conf",
model_config={"model": "gpt-4"},
)
# Should produce same hash regardless of dict order
assert bundle1.calculate_hash() == bundle2.calculate_hash()
def test_calculate_hash_changes_with_content(self):
"""Test hash changes when content changes."""
bundle1 = InputBundle(
template_digest="abc",
dependency_digests={},
resolution_config_hash="conf",
model_config={},
)
bundle2 = InputBundle(
template_digest="xyz", # Different template
dependency_digests={},
resolution_config_hash="conf",
model_config={},
)
assert bundle1.calculate_hash() != bundle2.calculate_hash()
def test_to_dict_includes_hash(self):
"""Test dictionary includes hash."""
bundle = InputBundle(
template_digest="abc",
dependency_digests={},
resolution_config_hash="conf",
model_config={},
)
data = bundle.to_dict()
assert "input_bundle_hash" in data
assert data["input_bundle_hash"] == bundle.calculate_hash()
class TestLLMResponse:
"""Tests for LLMResponse."""
def test_create_response(self):
"""Test creating LLM response."""
response = LLMResponse(
content="Generated text",
model="gpt-4",
usage={"total_tokens": 100},
finish_reason="stop",
)
assert response.content == "Generated text"
assert response.model == "gpt-4"
assert response.usage["total_tokens"] == 100
def test_response_to_dict(self):
"""Test serialization."""
response = LLMResponse(
content="Text",
model="gpt-4",
)
data = response.to_dict()
assert data["content"] == "Text"
assert data["model"] == "gpt-4"
class TestPromptRun:
"""Tests for PromptRun."""
def test_create_run(self):
"""Test creating run."""
run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash123",
)
assert run.id # Has UUID
assert run.template_id == "template-1"
assert run.input_bundle_hash == "hash123"
assert run.status == RunStatus.PENDING
assert run.stage == ExecutionStage.PENDING
assert run.depth == 0
def test_create_nested_run(self):
"""Test creating nested run."""
run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash",
parent_run_id="parent-123",
depth=2,
)
assert run.parent_run_id == "parent-123"
assert run.depth == 2
def test_advance_stage(self):
"""Test advancing execution stage."""
run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash",
)
assert run.stage == ExecutionStage.PENDING
run.advance_stage(ExecutionStage.ANALYSIS)
assert run.stage == ExecutionStage.ANALYSIS
run.advance_stage(ExecutionStage.PROCESSING)
assert run.stage == ExecutionStage.PROCESSING
assert run.status == RunStatus.RUNNING
def test_mark_complete(self):
"""Test marking run as complete."""
run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash",
)
run.mark_complete()
assert run.status == RunStatus.SUCCESS
assert run.stage == ExecutionStage.COMPLETE
assert run.completed_at is not None
assert run.is_complete()
def test_mark_failed(self):
"""Test marking run as failed."""
run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash",
)
run.mark_failed("Error message")
assert run.status == RunStatus.FAILED
assert run.stage == ExecutionStage.FAILED
assert run.error_message == "Error message"
assert run.completed_at is not None
assert run.is_complete()
def test_mark_skipped(self):
"""Test marking run as skipped."""
run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash",
)
run.mark_skipped()
assert run.status == RunStatus.SKIPPED
assert run.completed_at is not None
assert run.is_complete()
def test_run_to_dict(self):
"""Test serialization."""
run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash",
)
data = run.to_dict()
assert data["id"] == run.id
assert data["template_id"] == "template-1"
assert data["input_bundle_hash"] == "hash"
assert data["status"] == "pending"
assert data["stage"] == "pending"