From 85766be5bc69642378476204178f43b629ea7f9b Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 3 May 2026 01:20:58 +0200 Subject: [PATCH] Build dependency graph and dependency aware propagation of changes --- docs/dependency-aware-scope-propagation.md | 63 ++++ docs/terminology.md | 12 + src/repo_registry/core/models.py | 48 +++ src/repo_registry/core/service.py | 302 ++++++++++++++++++ tests/test_registry_service.py | 123 +++++++ ...0008-dependency-aware-scope-propagation.md | 12 +- 6 files changed, 554 insertions(+), 6 deletions(-) create mode 100644 docs/dependency-aware-scope-propagation.md diff --git a/docs/dependency-aware-scope-propagation.md b/docs/dependency-aware-scope-propagation.md new file mode 100644 index 0000000..76c98b2 --- /dev/null +++ b/docs/dependency-aware-scope-propagation.md @@ -0,0 +1,63 @@ +# Dependency-Aware Scope Propagation + +Repository Scoping treats the approved map as a spreadsheet-like dependency +graph. Observed facts are input cells. Evidence and features are local formulas +or reviewable links. Capabilities, abilities, and scope are higher-level claims +that may be curator-owned, mixed, or derived from lower-level support. + +## Graph Model + +The graph is directed upward toward the single scope root: + +```text +observed fact -> evidence/feature -> capability -> ability -> scope +``` + +Each edge records: + +- dependency type, such as `observes`, `supports`, `realizes`, or `summarizes` +- strength, usually copied from evidence strength where available +- source, such as `source_ref`, `approved_evidence`, or + `approved_characteristic` +- target ownership: `deterministic`, `mixed`, or `curator_owned` +- same-layer flag, used when one capability points at another capability or a + similar normalization signal appears + +Same-layer edges are allowed because real repositories often contain cross-cutting +support. They should still be visible to reviewers because too many same-layer +edges can mean the abstraction levels need cleanup. + +## Impact Analysis + +Impact analysis compares two analysis runs, collects changed observed facts, and +walks the dependency graph upward. A changed dependency marks downstream items +`stale` and records a reason chain that explains the path from the fact to the +affected characteristic. + +Recommended actions are derived from ownership: + +- deterministic targets can be recalculated +- mixed and curator-owned targets require review before approved truth changes + +The current implementation exposes this through `RegistryService`: + +- `build_dependency_graph(repository_id)` +- `analyze_dependency_impact(repository_id, base_run_id, target_run_id)` + +The impact result includes changed fact keys, impacted items, reason chains, +maximum propagation depth, breadth, and whether the root scope was affected. + +## Metrics + +Propagation depth says how far a source change bubbled up. Propagation breadth +says how many approved items were touched. Scope-level churn is especially +important, but it should be interpreted cautiously: it can reflect healthy +product discovery, an overly broad scope statement, or missing intermediate +characteristics. + +## LLM Assistance + +Optional LLM-assisted updates can propose recalculated text for stale mixed or +curator-owned characteristics. Those proposals remain candidates until reviewed. +Deterministic scanners still own observed facts, and approved characteristics +remain the registry truth until a curator accepts a replacement. diff --git a/docs/terminology.md b/docs/terminology.md index 27c00ee..cd63ed8 100644 --- a/docs/terminology.md +++ b/docs/terminology.md @@ -93,6 +93,18 @@ normalization. than disappearing. - Classification: a main type plus optional additional attributes that help users filter and orient without forcing every item into a single rigid box. +- Dependency: a directed edge showing that one fact or characteristic affects + another. Edges record type, strength, source, ownership, and whether the edge + stays within the same layer. +- Staleness: a freshness state assigned when an upstream dependency changes and + a downstream characteristic may no longer be current. +- Recalculation: an automated refresh of deterministic or mixed derived content + after upstream changes. Curator-owned claims should be reviewed before the new + value becomes approved registry truth. +- Propagation rate: the breadth and depth of downstream impact from changed + inputs. High propagation can indicate rapid discovery, weak normalization, or + brittle conceptual boundaries; it is a signal for review, not a score by + itself. ## Extraction Philosophy diff --git a/src/repo_registry/core/models.py b/src/repo_registry/core/models.py index 40ebb12..7fec1cd 100644 --- a/src/repo_registry/core/models.py +++ b/src/repo_registry/core/models.py @@ -150,6 +150,54 @@ class SourceReference: line: int | None = None +@dataclass(frozen=True) +class DependencyEdge: + source_kind: str + source_id: int | None + source_key: str + target_kind: str + target_id: int + target_key: str + dependency_type: str + strength: str + source: str + target_ownership: str + same_layer: bool = False + + +@dataclass(frozen=True) +class DependencyGraph: + repository: Repository + scope: "Scope" + edges: list[DependencyEdge] + + +@dataclass(frozen=True) +class DependencyImpactItem: + item_kind: str + item_id: int + item_key: str + name: str + freshness_state: str + ownership: str + recommended_action: str + impact_depth: int + reasons: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class DependencyImpactAnalysis: + repository: Repository + base_run: AnalysisRun + target_run: AnalysisRun + changed_fact_keys: list[str] + impacts: list[DependencyImpactItem] + max_depth: int + scope_impacted: bool + propagation_breadth: int + graph: DependencyGraph + + @dataclass(frozen=True) class CandidateEvidence: id: int diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index aa6b51b..c8e2c66 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -17,6 +17,10 @@ from repo_registry.core.models import ( CandidateGraph, CharacteristicRebuildResult, ContentChunk, + DependencyEdge, + DependencyGraph, + DependencyImpactAnalysis, + DependencyImpactItem, ExpectationGap, ObservedFact, Repository, @@ -946,6 +950,146 @@ class RegistryService: ), ) + def build_dependency_graph(self, repository_id: int) -> DependencyGraph: + repository = self.store.get_repository(repository_id) + ability_map = self.store.get_ability_map(repository_id) + edges: list[DependencyEdge] = [] + + scope_key = self._dependency_key("scope", ability_map.scope.id) + for ability in ability_map.abilities: + ability_key = self._dependency_key("ability", ability.id) + edges.append( + self._dependency_edge( + source_kind="ability", + source_id=ability.id, + source_key=ability_key, + target_kind="scope", + target_id=ability_map.scope.id, + target_key=scope_key, + dependency_type="summarizes", + strength="strong", + source="approved_characteristic", + ) + ) + for capability in ability.capabilities: + capability_key = self._dependency_key("capability", capability.id) + edges.append( + self._dependency_edge( + source_kind="capability", + source_id=capability.id, + source_key=capability_key, + target_kind="ability", + target_id=ability.id, + target_key=ability_key, + dependency_type="realizes", + strength="strong", + source="approved_characteristic", + ) + ) + edges.extend( + self._capability_dependency_edges( + capability, + capability_key=capability_key, + ) + ) + return DependencyGraph( + repository=repository, + scope=ability_map.scope, + edges=edges, + ) + + def analyze_dependency_impact( + self, + repository_id: int, + base_analysis_run_id: int, + target_analysis_run_id: int, + ) -> DependencyImpactAnalysis: + diff = self.diff_analysis_runs( + repository_id, + base_analysis_run_id, + target_analysis_run_id, + ) + graph = self.build_dependency_graph(repository_id) + changed_facts = [ + item + for section in ( + diff.facts.added, + diff.facts.removed, + diff.facts.changed, + diff.facts.weakened, + ) + for item in section + ] + changed_fact_keys = [item.key for item in changed_facts] + fact_reasons = { + item.key: f"{item.change_type} fact {item.key}" for item in changed_facts + } + adjacency: dict[str, list[DependencyEdge]] = {} + for edge in graph.edges: + adjacency.setdefault(edge.source_key, []).append(edge) + + queue: list[tuple[str, int, str]] = [ + (key, 0, fact_reasons[key]) for key in changed_fact_keys + ] + impacts_by_key: dict[str, DependencyImpactItem] = {} + visited_edges: set[tuple[str, str]] = set() + + while queue: + source_key, depth, inherited_reason = queue.pop(0) + for edge in adjacency.get(source_key, []): + edge_marker = (edge.source_key, edge.target_key) + if edge_marker in visited_edges: + continue + visited_edges.add(edge_marker) + impact_depth = depth + 1 + reason = ( + f"{inherited_reason} -> {edge.target_kind} depends on " + f"{edge.source_kind} via {edge.dependency_type}" + ) + current = impacts_by_key.get(edge.target_key) + if current is None: + impacts_by_key[edge.target_key] = DependencyImpactItem( + item_kind=edge.target_kind, + item_id=edge.target_id, + item_key=edge.target_key, + name=self._dependency_display_name( + repository_id, + edge.target_kind, + edge.target_id, + ), + freshness_state="stale", + ownership=edge.target_ownership, + recommended_action=self._recommended_action( + edge.target_ownership + ), + impact_depth=impact_depth, + reasons=[reason], + ) + else: + impacts_by_key[edge.target_key] = replace( + current, + impact_depth=min(current.impact_depth, impact_depth), + reasons=[*current.reasons, reason], + ) + queue.append((edge.target_key, impact_depth, reason)) + + impacts = sorted( + impacts_by_key.values(), + key=lambda item: (item.impact_depth, item.item_kind, item.item_id), + ) + max_depth = max((item.impact_depth for item in impacts), default=0) + return DependencyImpactAnalysis( + repository=diff.repository, + base_run=diff.base_run, + target_run=diff.target_run, + changed_fact_keys=changed_fact_keys, + impacts=impacts, + max_depth=max_depth, + scope_impacted=any(item.item_kind == "scope" for item in impacts), + propagation_breadth=len(impacts), + graph=graph, + ) + def approve_analysis_run_changes( self, repository_id: int, @@ -1989,6 +2133,7 @@ class RegistryService: return { f"fact:{fact.kind}:{fact.path}:{fact.name}": { "item_type": "fact", + "id": fact.id, "kind": fact.kind, "path": fact.path, "name": fact.name, @@ -1998,6 +2143,163 @@ class RegistryService: for fact in facts } + def _capability_dependency_edges( + self, + capability, + *, + capability_key: str, + ) -> list[DependencyEdge]: + edges: list[DependencyEdge] = [] + for feature in capability.features: + feature_key = self._dependency_key("feature", feature.id) + edges.append( + self._dependency_edge( + source_kind="feature", + source_id=feature.id, + source_key=feature_key, + target_kind="capability", + target_id=capability.id, + target_key=capability_key, + dependency_type="supports", + strength="medium", + source="approved_characteristic", + ) + ) + for source_ref in feature.source_refs: + edges.append( + self._dependency_edge( + source_kind="fact", + source_id=source_ref.fact_id, + source_key=self._source_ref_fact_key(source_ref), + target_kind="feature", + target_id=feature.id, + target_key=feature_key, + dependency_type="observes", + strength="strong", + source="source_ref", + ) + ) + for evidence in capability.evidence: + evidence_key = self._dependency_key("evidence", evidence.id) + edges.append( + self._dependency_edge( + source_kind="evidence", + source_id=evidence.id, + source_key=evidence_key, + target_kind="capability", + target_id=capability.id, + target_key=capability_key, + dependency_type="supports", + strength=evidence.strength or "medium", + source="approved_characteristic", + ) + ) + for source_ref in evidence.source_refs: + edges.append( + self._dependency_edge( + source_kind="fact", + source_id=source_ref.fact_id, + source_key=self._source_ref_fact_key(source_ref), + target_kind="evidence", + target_id=evidence.id, + target_key=evidence_key, + dependency_type="observes", + strength=evidence.strength or "medium", + source="source_ref", + ) + ) + if evidence.reference_kind in {"feature", "capability", "ability", "scope"}: + reference_id = evidence.reference_id + if reference_id is not None: + edges.append( + self._dependency_edge( + source_kind=evidence.reference_kind, + source_id=reference_id, + source_key=self._dependency_key( + evidence.reference_kind, + reference_id, + ), + target_kind=evidence.target_kind, + target_id=evidence.target_id or capability.id, + target_key=self._dependency_key( + evidence.target_kind, + evidence.target_id or capability.id, + ), + dependency_type="relates", + strength=evidence.strength or "medium", + source="approved_evidence", + ) + ) + return edges + + def _dependency_edge( + self, + *, + source_kind: str, + source_id: int | None, + source_key: str, + target_kind: str, + target_id: int, + target_key: str, + dependency_type: str, + strength: str, + source: str, + ) -> DependencyEdge: + return DependencyEdge( + source_kind=source_kind, + source_id=source_id, + source_key=source_key, + target_kind=target_kind, + target_id=target_id, + target_key=target_key, + dependency_type=dependency_type, + strength=strength, + source=source, + target_ownership=self._ownership_for_kind(target_kind), + same_layer=source_kind == target_kind, + ) + + def _dependency_key(self, kind: str, item_id: int) -> str: + return f"{kind}:{item_id}" + + def _source_ref_fact_key(self, source_ref) -> str: + return f"fact:{source_ref.kind}:{source_ref.path}:{source_ref.name}" + + def _ownership_for_kind(self, kind: str) -> str: + if kind == "fact": + return "deterministic" + if kind in {"evidence", "feature", "capability"}: + return "mixed" + return "curator_owned" + + def _recommended_action(self, ownership: str) -> str: + if ownership == "deterministic": + return "recalculate" + return "review" + + def _dependency_display_name( + self, + repository_id: int, + kind: str, + item_id: int, + ) -> str: + ability_map = self.store.get_ability_map(repository_id) + if kind == "scope" and ability_map.scope.id == item_id: + return ability_map.scope.name + for ability in ability_map.abilities: + if kind == "ability" and ability.id == item_id: + return ability.name + for capability in ability.capabilities: + if kind == "capability" and capability.id == item_id: + return capability.name + for feature in capability.features: + if kind == "feature" and feature.id == item_id: + return feature.name + for evidence in capability.evidence: + if kind == "evidence" and evidence.id == item_id: + return evidence.reference + return f"{kind}:{item_id}" + def _chunk_index( self, chunks: Sequence[ContentChunk], diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index d9dcd80..c7d5ae2 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -3,6 +3,7 @@ import logging import subprocess from repo_registry.core.logging import LOGGER_NAME +from repo_registry.core.models import SourceReference from repo_registry.core.service import RegistryService from repo_registry.llm_extraction import ( ExtractedAbility, @@ -140,6 +141,128 @@ def test_manual_registry_builds_ability_map(tmp_path): assert updated_map.scope.description == "Email routing repository scope." +def test_dependency_impact_propagates_changed_fact_to_scope(tmp_path): + service = make_service(tmp_path) + source = write_python_cli_repo(tmp_path) + repository = service.register_repository( + name="PyCLI", + url=str(source), + description="CLI command repository.", + ) + base_summary = service.analyze_repository( + repository.id, + source_path=str(source), + use_llm_assistance=False, + ) + click_fact = next( + fact + for fact in base_summary.facts + if fact.kind == "framework" + and fact.path == "requirements.txt" + and fact.name == "Click" + ) + source_ref = SourceReference( + fact_id=click_fact.id, + path=click_fact.path, + kind=click_fact.kind, + name=click_fact.name, + ) + ability_id = service.add_ability( + repository.id, + name="Command Line Operations", + description="Expose command line workflows.", + ) + capability_id = service.add_capability( + repository.id, + ability_id, + name="Click Command Execution", + description="Run commands through Click.", + ) + feature_id = service.store.create_feature( + repository.id, + capability_id, + name="Click decorator", + type="interface", + location="cli.py", + confidence=0.9, + source_refs=[source_ref], + ) + evidence_id = service.store.create_evidence( + repository.id, + capability_id, + type="dependency", + reference="Click dependency", + strength="strong", + source_refs=[source_ref], + ) + + (source / "requirements.txt").write_text("typer\npytest\n", encoding="utf-8") + target_summary = service.analyze_repository( + repository.id, + source_path=str(source), + use_llm_assistance=False, + ) + + impact = service.analyze_dependency_impact( + repository.id, + base_summary.analysis_run.id, + target_summary.analysis_run.id, + ) + + impacted_keys = {item.item_key for item in impact.impacts} + assert f"feature:{feature_id}" in impacted_keys + assert f"evidence:{evidence_id}" in impacted_keys + assert f"capability:{capability_id}" in impacted_keys + assert f"ability:{ability_id}" in impacted_keys + assert f"scope:{service.store.get_ability_map(repository.id).scope.id}" in impacted_keys + assert impact.scope_impacted is True + assert impact.max_depth == 4 + assert any( + "removed fact fact:framework:requirements.txt:Click" in reason + for item in impact.impacts + for reason in item.reasons + ) + assert all(item.freshness_state == "stale" for item in impact.impacts) + + +def test_dependency_graph_flags_same_layer_edges(tmp_path): + service = make_service(tmp_path) + repository = service.register_repository( + name="Same Layer", + url="https://example.com/same-layer.git", + description="Tests same-layer dependency normalization signals.", + ) + ability_id = service.add_ability(repository.id, name="Operations") + first_capability_id = service.add_capability( + repository.id, + ability_id, + name="Source Capability", + ) + second_capability_id = service.add_capability( + repository.id, + ability_id, + name="Target Capability", + ) + service.store.create_evidence( + repository.id, + second_capability_id, + type="relationship", + reference="Target depends on source capability", + strength="medium", + target_kind="capability", + target_id=second_capability_id, + reference_kind="capability", + reference_id=first_capability_id, + ) + + graph = service.build_dependency_graph(repository.id) + + same_layer_edges = [edge for edge in graph.edges if edge.same_layer] + assert len(same_layer_edges) == 1 + assert same_layer_edges[0].source_key == f"capability:{first_capability_id}" + assert same_layer_edges[0].target_key == f"capability:{second_capability_id}" + + def test_manual_registry_updates_and_deletes_approved_entries(tmp_path): service = make_service(tmp_path) repository = service.register_repository( diff --git a/workplans/RREG-WP-0008-dependency-aware-scope-propagation.md b/workplans/RREG-WP-0008-dependency-aware-scope-propagation.md index 2a4c544..b4cfd20 100644 --- a/workplans/RREG-WP-0008-dependency-aware-scope-propagation.md +++ b/workplans/RREG-WP-0008-dependency-aware-scope-propagation.md @@ -8,7 +8,7 @@ status: active owner: codex topic_slug: foerster-capabilities created: "2026-05-01" -updated: "2026-05-01" +updated: "2026-05-03" state_hub_workstream_id: "81ec50c4-fe73-41ea-ba5b-2e02be2bed42" --- @@ -42,7 +42,7 @@ changes stay local may have a more stable conceptual model. ```task id: RREG-WP-0008-T01 -status: todo +status: done priority: high state_hub_task_id: "a7a6080e-a78b-447b-a3a9-0a575f882578" ``` @@ -60,7 +60,7 @@ Acceptance criteria: ```task id: RREG-WP-0008-T02 -status: todo +status: done priority: high state_hub_task_id: "0f00ffaf-5685-4327-bb93-b5c78bc97bd1" ``` @@ -80,7 +80,7 @@ Acceptance criteria: ```task id: RREG-WP-0008-T03 -status: todo +status: in_progress priority: high state_hub_task_id: "29a85e0a-7203-484c-aa5d-94096ab695a9" ``` @@ -98,7 +98,7 @@ Acceptance criteria: ```task id: RREG-WP-0008-T04 -status: todo +status: done priority: medium state_hub_task_id: "99cab29a-d51a-4709-b08c-46414bab8053" ``` @@ -133,7 +133,7 @@ Acceptance criteria: ```task id: RREG-WP-0008-T06 -status: todo +status: done priority: medium state_hub_task_id: "44b0b992-da74-45b0-970a-2f3786ac3b31" ```