feat(prompts): implement Phase 6 - Incremental Execution (FR-7, FR-8)

Add change detection, structural diff-based impact analysis,
configurable-depth incremental recomputation with circular suppression,
and impact debt tracking.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 13:18:27 +01:00
parent 9ce157400e
commit bd1d05ba79
13 changed files with 2446 additions and 0 deletions

View File

@@ -0,0 +1,166 @@
"""
Unit tests for ChangeDetector.
Tests change detection, recording, change types, and no-change cases.
"""
import pytest
import tempfile
from pathlib import Path
from markitect.prompts.models import Artifact, ArtifactType, calculate_content_digest
from markitect.prompts.incremental.detector import ChangeDetector
from markitect.prompts.incremental.models import ChangeType
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def detector(temp_db):
"""Create ChangeDetector instance."""
return ChangeDetector(temp_db)
def _make_artifact(content="original content"):
"""Helper to create an in-memory artifact."""
return Artifact.create(
space_id="space-1",
name="test-artifact",
content=content,
artifact_type=ArtifactType.CONTENT,
)
class TestDetectChange:
"""Tests for detecting content changes."""
def test_detect_modification(self, detector):
"""Test detecting a content modification."""
artifact = _make_artifact("original content")
change = detector.detect_change(artifact, "modified content")
assert change is not None
assert change.artifact_id == artifact.id
assert change.old_digest == artifact.content_digest
assert change.new_digest == calculate_content_digest("modified content")
assert change.change_type == ChangeType.MODIFIED
def test_no_change_returns_none(self, detector):
"""Test that identical content returns None."""
artifact = _make_artifact("same content")
change = detector.detect_change(artifact, "same content")
assert change is None
def test_detect_whitespace_change(self, detector):
"""Test detecting whitespace-only changes."""
artifact = _make_artifact("content")
change = detector.detect_change(artifact, "content ")
assert change is not None
assert change.change_type == ChangeType.MODIFIED
def test_detect_empty_to_content(self, detector):
"""Test detecting change from empty to content."""
artifact = _make_artifact("")
change = detector.detect_change(artifact, "new content")
assert change is not None
assert change.change_type == ChangeType.MODIFIED
class TestDetectCreation:
"""Tests for recording artifact creation."""
def test_detect_creation(self, detector):
"""Test creation change record."""
change = detector.detect_creation("artifact-123", "new content")
assert change.artifact_id == "artifact-123"
assert change.old_digest is None
assert change.new_digest == calculate_content_digest("new content")
assert change.change_type == ChangeType.CREATED
def test_creation_has_unique_id(self, detector):
"""Test that each creation gets a unique ID."""
change1 = detector.detect_creation("art-1", "content")
change2 = detector.detect_creation("art-2", "content")
assert change1.id != change2.id
class TestDetectDeletion:
"""Tests for recording artifact deletion."""
def test_detect_deletion(self, detector):
"""Test deletion change record."""
artifact = _make_artifact("content to delete")
change = detector.detect_deletion(artifact)
assert change.artifact_id == artifact.id
assert change.old_digest == artifact.content_digest
assert change.change_type == ChangeType.DELETED
class TestRecordChange:
"""Tests for persisting change records."""
def test_record_and_retrieve(self, detector):
"""Test recording a change and retrieving it."""
artifact = _make_artifact("original")
change = detector.detect_change(artifact, "modified")
assert change is not None
detector.record_change(change)
changes = detector.get_changes_for_artifact(artifact.id)
assert len(changes) == 1
assert changes[0].id == change.id
assert changes[0].artifact_id == artifact.id
assert changes[0].change_type == ChangeType.MODIFIED
def test_record_multiple_changes(self, detector):
"""Test recording multiple changes for same artifact."""
artifact = _make_artifact("v1")
change1 = detector.detect_change(artifact, "v2")
detector.record_change(change1)
# Simulate artifact update
artifact.update_content("v2")
change2 = detector.detect_change(artifact, "v3")
detector.record_change(change2)
changes = detector.get_changes_for_artifact(artifact.id)
assert len(changes) == 2
def test_get_changes_by_type(self, detector):
"""Test filtering changes by type."""
# Record a creation
creation = detector.detect_creation("art-new", "content")
detector.record_change(creation)
# Record a modification
artifact = _make_artifact("old")
modification = detector.detect_change(artifact, "new")
detector.record_change(modification)
created_changes = detector.get_changes_by_type(ChangeType.CREATED)
assert len(created_changes) == 1
assert created_changes[0].change_type == ChangeType.CREATED
modified_changes = detector.get_changes_by_type(ChangeType.MODIFIED)
assert len(modified_changes) == 1
assert modified_changes[0].change_type == ChangeType.MODIFIED
def test_no_changes_returns_empty(self, detector):
"""Test querying changes for artifact with none recorded."""
changes = detector.get_changes_for_artifact("nonexistent")
assert changes == []

View File

@@ -0,0 +1,162 @@
"""
Unit tests for ImpactAnalyzer and metrics functions.
Tests diff ratios, magnitude scoring, and threshold decisions.
"""
import pytest
from markitect.prompts.incremental.metrics import (
structural_diff_ratio,
line_diff_ratio,
calculate_change_magnitude,
)
from markitect.prompts.incremental.impact import ImpactAnalyzer
from markitect.prompts.incremental.models import RecomputeConfig
class TestStructuralDiffRatio:
"""Tests for structural_diff_ratio."""
def test_identical_content(self):
"""Test identical content returns 0.0."""
assert structural_diff_ratio("hello", "hello") == 0.0
def test_completely_different(self):
"""Test completely different content returns high ratio."""
ratio = structural_diff_ratio("aaa", "zzz")
assert ratio > 0.5
def test_empty_strings(self):
"""Test both empty returns 0.0."""
assert structural_diff_ratio("", "") == 0.0
def test_one_empty(self):
"""Test one empty returns 1.0."""
assert structural_diff_ratio("", "content") == 1.0
assert structural_diff_ratio("content", "") == 1.0
def test_small_change(self):
"""Test small change returns low ratio."""
old = "The quick brown fox jumps over the lazy dog"
new = "The quick brown fox leaps over the lazy dog"
ratio = structural_diff_ratio(old, new)
assert 0.0 < ratio < 0.5
def test_returns_float(self):
"""Test return value is float between 0 and 1."""
ratio = structural_diff_ratio("abc", "abd")
assert isinstance(ratio, float)
assert 0.0 <= ratio <= 1.0
class TestLineDiffRatio:
"""Tests for line_diff_ratio."""
def test_identical_lines(self):
"""Test identical multi-line content returns 0.0."""
content = "line1\nline2\nline3"
assert line_diff_ratio(content, content) == 0.0
def test_one_line_changed(self):
"""Test changing one line of several."""
old = "line1\nline2\nline3"
new = "line1\nmodified\nline3"
ratio = line_diff_ratio(old, new)
assert 0.0 < ratio < 1.0
def test_all_lines_changed(self):
"""Test all lines changed returns high ratio."""
old = "aaa\nbbb\nccc"
new = "xxx\nyyy\nzzz"
ratio = line_diff_ratio(old, new)
assert ratio > 0.5
def test_empty_strings(self):
"""Test both empty returns 0.0."""
assert line_diff_ratio("", "") == 0.0
def test_one_empty(self):
"""Test one empty returns 1.0."""
assert line_diff_ratio("", "content") == 1.0
assert line_diff_ratio("content", "") == 1.0
class TestCalculateChangeMagnitude:
"""Tests for calculate_change_magnitude."""
def test_none_old_content(self):
"""Test None old_content (creation) returns 1.0."""
assert calculate_change_magnitude(None, "new content") == 1.0
def test_none_new_content(self):
"""Test None new_content (deletion) returns 1.0."""
assert calculate_change_magnitude("old content", None) == 1.0
def test_both_none(self):
"""Test both None returns 0.0."""
assert calculate_change_magnitude(None, None) == 0.0
def test_structural_method(self):
"""Test structural method (default)."""
result = calculate_change_magnitude("abc", "abd", method="structural")
assert 0.0 < result < 1.0
def test_line_method(self):
"""Test line method."""
result = calculate_change_magnitude("abc\ndef", "abc\nxyz", method="line")
assert 0.0 < result < 1.0
def test_identical_content(self):
"""Test identical content returns 0.0."""
assert calculate_change_magnitude("same", "same") == 0.0
class TestImpactAnalyzer:
"""Tests for ImpactAnalyzer class."""
@pytest.fixture
def analyzer(self):
"""Create ImpactAnalyzer instance."""
return ImpactAnalyzer()
def test_calculate_magnitude(self, analyzer):
"""Test magnitude calculation delegates to metrics."""
result = analyzer.calculate_magnitude("old", "new")
assert isinstance(result, float)
assert 0.0 <= result <= 1.0
def test_calculate_magnitude_creation(self, analyzer):
"""Test magnitude for creation."""
assert analyzer.calculate_magnitude(None, "new") == 1.0
def test_calculate_magnitude_identical(self, analyzer):
"""Test magnitude for identical content."""
assert analyzer.calculate_magnitude("same", "same") == 0.0
def test_should_recompute_above_threshold(self, analyzer):
"""Test recompute when magnitude exceeds threshold."""
config = RecomputeConfig(impact_threshold=0.3)
assert analyzer.should_recompute(0.5, config) is True
def test_should_recompute_at_threshold(self, analyzer):
"""Test recompute when magnitude equals threshold."""
config = RecomputeConfig(impact_threshold=0.5)
assert analyzer.should_recompute(0.5, config) is True
def test_should_not_recompute_below_threshold(self, analyzer):
"""Test no recompute when magnitude below threshold."""
config = RecomputeConfig(impact_threshold=0.5)
assert analyzer.should_recompute(0.3, config) is False
def test_zero_threshold_always_recomputes(self, analyzer):
"""Test zero threshold means any change triggers recompute."""
config = RecomputeConfig(impact_threshold=0.0)
assert analyzer.should_recompute(0.0, config) is True
assert analyzer.should_recompute(0.01, config) is True
def test_high_threshold_only_major_changes(self, analyzer):
"""Test high threshold only triggers on major changes."""
config = RecomputeConfig(impact_threshold=0.9)
assert analyzer.should_recompute(0.5, config) is False
assert analyzer.should_recompute(0.95, config) is True

View File

@@ -0,0 +1,364 @@
"""
Unit tests for IncrementalExecutionEngine.
Tests recompute flow, depth control, circular suppression, and budget limits.
"""
import pytest
import tempfile
from pathlib import Path
from unittest.mock import MagicMock
from markitect.prompts.dependencies.models import DependencyEdge, EdgeType
from markitect.prompts.dependencies.repository import SQLiteDependencyRepository
from markitect.prompts.dependencies.queries import DependencyQueryService
from markitect.prompts.execution.models import PromptRun, RunConfig, RunStatus
from markitect.prompts.incremental.engine import IncrementalExecutionEngine
from markitect.prompts.incremental.models import (
ArtifactChange,
ChangeType,
ImpactDebt,
RecomputeConfig,
)
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def dep_repo(temp_db):
"""Create dependency repository."""
return SQLiteDependencyRepository(temp_db)
@pytest.fixture
def query_service(dep_repo):
"""Create DependencyQueryService."""
return DependencyQueryService(dep_repo)
@pytest.fixture
def engine(temp_db, query_service):
"""Create IncrementalExecutionEngine."""
return IncrementalExecutionEngine(temp_db, query_service)
def _create_edge(repo, src, tgt, run_id="run-1", edge_type=EdgeType.REQUIRES):
"""Helper to create and persist a dependency edge."""
edge = DependencyEdge.create(
source_artifact_id=src,
target_artifact_id=tgt,
run_id=run_id,
edge_type=edge_type,
)
return repo.create(edge)
def _make_change(artifact_id="art-1"):
"""Helper to create a test ArtifactChange."""
return ArtifactChange.create(
artifact_id=artifact_id,
old_digest="old-digest",
new_digest="new-digest",
change_type=ChangeType.MODIFIED,
)
class TestFindDependentsAtDepth:
"""Tests for BFS depth-controlled dependent finding."""
def test_depth_1_direct_only(self, dep_repo, engine):
"""Test depth=1 finds only direct dependents."""
# A -> B -> C (A depends on B, B depends on C)
_create_edge(dep_repo, "A", "B")
_create_edge(dep_repo, "B", "C")
# Dependents of C at depth 1: only B
dependents = engine.find_dependents_at_depth("C", max_depth=1)
assert dependents == {"B"}
def test_depth_2_transitive(self, dep_repo, engine):
"""Test depth=2 finds two levels of dependents."""
# A -> B -> C
_create_edge(dep_repo, "A", "B")
_create_edge(dep_repo, "B", "C")
# Dependents of C at depth 2: B and A
dependents = engine.find_dependents_at_depth("C", max_depth=2)
assert dependents == {"A", "B"}
def test_depth_0_returns_empty(self, dep_repo, engine):
"""Test depth=0 returns no dependents."""
_create_edge(dep_repo, "A", "B")
dependents = engine.find_dependents_at_depth("B", max_depth=0)
assert dependents == set()
def test_no_dependents(self, engine):
"""Test artifact with no dependents."""
dependents = engine.find_dependents_at_depth("isolated", max_depth=5)
assert dependents == set()
def test_diamond_dependents(self, dep_repo, engine):
"""Test diamond-shaped dependency graph."""
# A -> C, B -> C, D -> A, D -> B
_create_edge(dep_repo, "A", "C")
_create_edge(dep_repo, "B", "C")
_create_edge(dep_repo, "D", "A")
_create_edge(dep_repo, "D", "B")
dependents = engine.find_dependents_at_depth("C", max_depth=2)
assert dependents == {"A", "B", "D"}
class TestRecompute:
"""Tests for the recompute orchestration flow."""
def test_basic_recompute(self, dep_repo, engine):
"""Test basic recompute with execution callback."""
_create_edge(dep_repo, "A", "B")
change = _make_change("B")
mock_run = PromptRun.create(
template_id="template-1",
input_bundle_hash="hash-1",
)
def callback(run_id):
return mock_run
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1),
execution_callback=callback,
old_content="old",
new_content="new",
)
assert result.changed_artifact_id == "B"
assert result.total_dependents == 1
assert result.recomputed_count == 1
assert result.suppressed_count == 0
assert len(result.executed_run_ids) == 1
def test_dry_run_no_callback(self, dep_repo, engine):
"""Test recompute without callback records what would be recomputed."""
_create_edge(dep_repo, "A", "B")
change = _make_change("B")
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1),
old_content="old",
new_content="new",
)
assert result.recomputed_count == 1
assert result.executed_run_ids == ["A"]
def test_no_dependents(self, engine):
"""Test recompute with no dependents."""
change = _make_change("isolated")
result = engine.recompute(change)
assert result.total_dependents == 0
assert result.recomputed_count == 0
assert result.suppressed_count == 0
def test_depth_control(self, dep_repo, engine):
"""Test depth limiting controls recompute scope."""
# A -> B -> C
_create_edge(dep_repo, "A", "B")
_create_edge(dep_repo, "B", "C")
change = _make_change("C")
# Depth 1: only B
result1 = engine.recompute(
change,
config=RecomputeConfig(max_depth=1),
old_content="old",
new_content="new",
)
assert result1.total_dependents == 1
assert result1.recomputed_count == 1
# Depth 2: B and A
result2 = engine.recompute(
change,
config=RecomputeConfig(max_depth=2),
old_content="old",
new_content="new",
)
assert result2.total_dependents == 2
assert result2.recomputed_count == 2
class TestBudgetLimits:
"""Tests for recompute budget exhaustion."""
def test_budget_exhaustion(self, dep_repo, engine):
"""Test budget limit suppresses excess recomputes."""
# Create 5 dependents of C
for i in range(5):
_create_edge(dep_repo, f"dep-{i}", "C")
change = _make_change("C")
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1, max_recomputes=3),
old_content="old",
new_content="new",
)
assert result.total_dependents == 5
assert result.recomputed_count == 3
assert result.suppressed_count == 2
assert all(
d.suppression_reason == "budget_exhausted"
for d in result.suppressed
)
def test_budget_zero_suppresses_all(self, dep_repo, engine):
"""Test zero budget suppresses all recomputes."""
_create_edge(dep_repo, "A", "B")
change = _make_change("B")
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1, max_recomputes=0),
old_content="old",
new_content="new",
)
assert result.recomputed_count == 0
assert result.suppressed_count == 1
class TestCircularSuppression:
"""Tests for circular dependency suppression."""
def test_circular_dependency_suppressed(self, dep_repo, engine):
"""Test circular dependency is suppressed."""
# A -> B and B -> A (circular)
_create_edge(dep_repo, "A", "B")
_create_edge(dep_repo, "B", "A")
change = _make_change("B")
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1, suppress_circular=True),
old_content="old",
new_content="new",
)
assert result.total_dependents == 1 # A is a dependent of B
# A depends on B, and B depends on A — would_create_cycle(A, B) is True
assert result.suppressed_count == 1
assert result.suppressed[0].suppression_reason == "circular_dependency"
def test_circular_suppression_disabled(self, dep_repo, engine):
"""Test circular suppression can be disabled."""
_create_edge(dep_repo, "A", "B")
_create_edge(dep_repo, "B", "A")
change = _make_change("B")
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1, suppress_circular=False),
old_content="old",
new_content="new",
)
# With suppression disabled, circular deps are still recomputed
assert result.recomputed_count == 1
assert result.suppressed_count == 0
class TestThresholdSuppression:
"""Tests for impact threshold suppression."""
def test_below_threshold_suppressed(self, dep_repo, engine):
"""Test below-threshold changes are suppressed."""
_create_edge(dep_repo, "A", "B")
change = _make_change("B")
# High threshold, small change
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1, impact_threshold=0.9),
old_content="hello world",
new_content="hello World", # small change
)
assert result.suppressed_count == 1
assert result.suppressed[0].suppression_reason == "below_threshold"
def test_above_threshold_recomputed(self, dep_repo, engine):
"""Test above-threshold changes trigger recompute."""
_create_edge(dep_repo, "A", "B")
change = _make_change("B")
result = engine.recompute(
change,
config=RecomputeConfig(max_depth=1, impact_threshold=0.1),
old_content="completely old content here",
new_content="entirely new different stuff",
)
assert result.recomputed_count == 1
assert result.suppressed_count == 0
class TestDebtPersistence:
"""Tests for impact debt persistence."""
def test_debt_recorded_in_db(self, dep_repo, engine):
"""Test suppressed recomputes are persisted as debt."""
_create_edge(dep_repo, "A", "B")
_create_edge(dep_repo, "B", "A")
change = _make_change("B")
engine.recompute(
change,
config=RecomputeConfig(max_depth=1, suppress_circular=True),
old_content="old",
new_content="new",
)
debt = engine.get_debt_for_artifact("B")
assert len(debt) == 1
assert debt[0].suppression_reason == "circular_dependency"
def test_get_all_debt(self, dep_repo, engine):
"""Test retrieving all debt records."""
# Create two separate suppressed recomputes
_create_edge(dep_repo, "A", "B")
_create_edge(dep_repo, "B", "A")
_create_edge(dep_repo, "C", "D")
_create_edge(dep_repo, "D", "C")
change1 = _make_change("B")
engine.recompute(
change1,
config=RecomputeConfig(max_depth=1, suppress_circular=True),
old_content="old",
new_content="new",
)
change2 = _make_change("D")
engine.recompute(
change2,
config=RecomputeConfig(max_depth=1, suppress_circular=True),
old_content="old",
new_content="new",
)
all_debt = engine.get_all_debt()
assert len(all_debt) == 2