markitect-main/tests/unit/prompts/test_execution_engine.py

"""Unit tests for PromptExecutionEngine."""

import pytest
import tempfile
from pathlib import Path

from markitect.prompts.templates.models import PromptTemplate
from markitect.prompts.templates.analyzer import TemplateAnalyzer
from markitect.prompts.resolver.resolver import PromptResolver
from markitect.prompts.resolver.compiler import ContextCompiler
from markitect.prompts.resolver.strategy import (
    MultiSpaceResolutionStrategy,
    ResolutionConfig,
)
from markitect.prompts.execution.engine import PromptExecutionEngine
from markitect.prompts.execution.models import RunConfig, RunStatus, ExecutionStage
from markitect.prompts.execution.llm_adapter import MockLLMAdapter, ErrorLLMAdapter
from markitect.prompts.services.artifact_service import ArtifactService
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository


@pytest.fixture
def temp_db():
    """Create temporary database."""
    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
        db_path = f.name
    yield db_path
    Path(db_path).unlink(missing_ok=True)


@pytest.fixture
def artifact_service(temp_db):
    """Create artifact service."""
    repository = SQLiteArtifactRepository(temp_db)
    return ArtifactService(repository)


@pytest.fixture
def analyzer():
    """Create template analyzer."""
    return TemplateAnalyzer()


@pytest.fixture
def resolver(artifact_service):
    """Create resolver."""
    strategy = MultiSpaceResolutionStrategy()
    return PromptResolver(artifact_service, strategy)


@pytest.fixture
def compiler():
    """Create compiler."""
    return ContextCompiler()


@pytest.fixture
def mock_llm():
    """Create mock LLM adapter."""
    return MockLLMAdapter(mock_response="Mock LLM output")


@pytest.fixture
def engine(artifact_service, analyzer, resolver, compiler, mock_llm):
    """Create execution engine."""
    return PromptExecutionEngine(
        artifact_service,
        analyzer,
        resolver,
        compiler,
        mock_llm,
    )


class TestPromptExecutionEngine:
    """Tests for PromptExecutionEngine."""

    def test_execute_simple_template(
        self, engine, artifact_service, mock_llm
    ):
        """Test executing simple template."""
        # Create dependency
        artifact_service.create_artifact(
            space_id="space-1",
            name="intro",
            content="Introduction text",
        )

        # Create template
        content = "# Document\n{{require:intro}}\nMore content"
        template = PromptTemplate.create(
            space_id="space-1",
            name="doc",
            content=content,
        )

        # Execute
        resolution_config = ResolutionConfig(space_id="space-1")
        run = engine.execute(template, content, resolution_config)

        # Verify run
        assert run.status == RunStatus.SUCCESS
        assert run.stage == ExecutionStage.COMPLETE
        assert run.completed_at is not None
        assert "output_artifact_id" in run.metadata

        # Verify LLM was called
        assert mock_llm.call_count == 1
        assert mock_llm.last_prompt is not None

    def test_execute_with_failed_resolution(self, engine):
        """Test execution with missing required dependency."""
        # Create template with missing dependency
        content = "{{require:missing-dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        run = engine.execute(template, content, resolution_config)

        # Should fail during resolution
        assert run.status == RunStatus.FAILED
        assert "Resolution failed" in run.error_message

    def test_idempotent_execution_skips_duplicate(
        self, engine, artifact_service
    ):
        """Test idempotent execution (FR-4.4)."""
        # Create dependency
        artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Dependency",
        )

        content = "{{require:dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        config = RunConfig(skip_if_exists=True)

        # First execution
        run1 = engine.execute(template, content, resolution_config, config)
        assert run1.status == RunStatus.SUCCESS

        # Second execution with same inputs
        run2 = engine.execute(template, content, resolution_config, config)

        # Should be skipped
        assert run2.status == RunStatus.SKIPPED
        assert run2.id != run1.id  # Different run
        assert run2.input_bundle_hash == run1.input_bundle_hash  # Same hash
        assert "skipped_due_to" in run2.metadata

    def test_execution_without_skip_reruns(
        self, engine, artifact_service
    ):
        """Test execution without skip flag reruns."""
        artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Dependency",
        )

        content = "{{require:dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        config = RunConfig(skip_if_exists=False)

        # First execution
        run1 = engine.execute(template, content, resolution_config, config)
        assert run1.status == RunStatus.SUCCESS

        # Second execution
        run2 = engine.execute(template, content, resolution_config, config)

        # Should execute again (not skipped)
        assert run2.status == RunStatus.SUCCESS
        assert run2.id != run1.id

    def test_input_bundle_hash_changes_with_template(
        self, engine, artifact_service, mock_llm
    ):
        """Test hash changes when template changes."""
        artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Dependency",
        )

        # First template
        content1 = "{{require:dep}} version 1"
        template1 = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content1,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        run1 = engine.execute(template1, content1, resolution_config)

        # Different template content
        content2 = "{{require:dep}} version 2"
        template2 = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content2,
        )

        run2 = engine.execute(template2, content2, resolution_config)

        # Different hashes, both execute
        assert run1.input_bundle_hash != run2.input_bundle_hash
        assert run1.status == RunStatus.SUCCESS
        assert run2.status == RunStatus.SUCCESS

    def test_input_bundle_hash_changes_with_dependencies(
        self, engine, artifact_service
    ):
        """Test hash changes when dependency content changes."""
        # Create initial dependency
        dep = artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Original content",
        )

        content = "{{require:dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        run1 = engine.execute(template, content, resolution_config)

        # Update dependency
        artifact_service.update_artifact_content(dep.id, "Modified content")

        run2 = engine.execute(template, content, resolution_config)

        # Hashes should differ
        assert run1.input_bundle_hash != run2.input_bundle_hash

    def test_execution_creates_manifest(
        self, engine, artifact_service
    ):
        """Test execution creates RunManifest."""
        artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Dependency",
        )

        content = "{{require:dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        run = engine.execute(template, content, resolution_config)

        # Check manifest in metadata
        assert "manifest" in run.metadata
        manifest_data = run.metadata["manifest"]
        assert manifest_data["run_id"] == run.id
        assert len(manifest_data["resolved_inputs"]) == 1
        assert len(manifest_data["output_artifacts"]) == 1
        assert "timing_metadata" in manifest_data

    def test_execution_with_llm_error(
        self, artifact_service, analyzer, resolver, compiler
    ):
        """Test execution handles LLM errors."""
        # Create engine with error adapter
        error_llm = ErrorLLMAdapter("LLM service unavailable")
        engine = PromptExecutionEngine(
            artifact_service,
            analyzer,
            resolver,
            compiler,
            error_llm,
        )

        artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Dependency",
        )

        content = "{{require:dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        run = engine.execute(template, content, resolution_config)

        # Should fail during processing
        assert run.status == RunStatus.FAILED
        assert "LLM service unavailable" in run.error_message

    def test_get_run_by_hash(self, engine, artifact_service):
        """Test retrieving cached run by hash."""
        artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Dependency",
        )

        content = "{{require:dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        run = engine.execute(template, content, resolution_config)

        # Retrieve by hash
        cached = engine.get_run_by_hash(run.input_bundle_hash)
        assert cached is not None
        assert cached.id == run.id

    def test_clear_cache(self, engine, artifact_service):
        """Test clearing run cache."""
        artifact_service.create_artifact(
            space_id="space-1",
            name="dep",
            content="Dependency",
        )

        content = "{{require:dep}}"
        template = PromptTemplate.create(
            space_id="space-1",
            name="test",
            content=content,
        )

        resolution_config = ResolutionConfig(space_id="space-1")
        run = engine.execute(template, content, resolution_config)

        # Cache should have run
        assert engine.get_run_by_hash(run.input_bundle_hash) is not None

        # Clear cache
        engine.clear_cache()

        # Should be gone
        assert engine.get_run_by_hash(run.input_bundle_hash) is None