utility relationships understanding of INTENT.md vs. SCOPE.md and documentation.

2026-05-02 17:33:58 +02:00
parent 11c5beba58
commit d9df4da12a
7 changed files with 311 additions and 28 deletions
--- a/src/repo_registry/candidate_graph/generator.py
+++ b/src/repo_registry/candidate_graph/generator.py
@@ -74,6 +74,7 @@ class CandidateGraphGenerator:
        credential_configs = self._facts(facts, "credential_config")
        provider_registries = self._facts(facts, "provider_registry")
        fallback_policies = self._facts(facts, "fallback_policy")
+        intent_facts = self._facts(facts, "intent")
        ability_primary_class, ability_attributes = self._ability_classification(
            repository,
            facts,
@@ -103,6 +104,9 @@ class CandidateGraphGenerator:
            capabilities.append(
                self._interface_capability(interfaces, tests, examples, docs, chunks)
            )
+        capabilities.extend(
+            self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
+        )
        promotable_llm_providers = self._promotable_llm_facts(llm_providers)
        promotable_provider_registries = self._promotable_llm_facts(provider_registries)
        promotable_fallback_policies = self._promotable_llm_facts(fallback_policies)
@@ -139,11 +143,11 @@ class CandidateGraphGenerator:
                        languages=languages,
                        docs=docs,
                    ),
-                    source_refs=self._source_refs(manifests + frameworks + languages),
-                    primary_class="repository-structure",
-                    attributes=self._structure_attributes(
-                        manifests,
-                        frameworks,
+            source_refs=self._source_refs(manifests + frameworks + languages),
+            primary_class="repository-structure",
+            attributes=self._structure_attributes(
+                manifests,
+                frameworks,
                        languages,
                    ),
                    evidence=self._evidence(tests, examples, docs),
@@ -284,6 +288,91 @@ class CandidateGraphGenerator:
            evidence=self._evidence(tests, examples, docs),
        )

+    def _intent_capabilities(
+        self,
+        intent_facts: list[ObservedFact],
+        chunks: list[ContentChunk],
+        tests: list[ObservedFact],
+        examples: list[ObservedFact],
+        docs: list[ObservedFact],
+    ) -> list[CandidateCapabilityDraft]:
+        intent_chunks = [
+            chunk
+            for chunk in chunks
+            if chunk.kind == "intent"
+            and (
+                chunk.metadata.get("source_role") == "intent_summary"
+                or chunk.path.lower().endswith("intent.md")
+            )
+        ]
+        if not intent_chunks:
+            return []
+        source_refs = self._source_refs(intent_facts)
+        capabilities: list[CandidateCapabilityDraft] = []
+        seen: set[str] = set()
+        for item in self._intent_capability_items(intent_chunks):
+            name = self._intent_capability_name(item)
+            key = name.lower()
+            if not name or key in seen:
+                continue
+            seen.add(key)
+            capabilities.append(
+                CandidateCapabilityDraft(
+                    name=name,
+                    description=(
+                        "Reviewable intended capability extracted from repository "
+                        f"intent: {item}"
+                    ),
+                    inputs=[],
+                    outputs=[name],
+                    confidence=self._confidence(
+                        0.45,
+                        [
+                            (0.15, bool(source_refs)),
+                            (0.10, bool(tests)),
+                            (0.05, bool(examples)),
+                            (0.05, bool(docs)),
+                        ],
+                    ),
+                    source_refs=source_refs,
+                    primary_class="intent-capability",
+                    attributes=[
+                        "intent-derived",
+                        "utility-owned",
+                        "review-required-intent",
+                    ],
+                    evidence=self._evidence(tests, examples, docs),
+                )
+            )
+        return capabilities
+
+    def _intent_capability_items(self, chunks: list[ContentChunk]) -> list[str]:
+        items: list[str] = []
+        in_capability_section = False
+        for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
+            for raw_line in chunk.text.splitlines():
+                line = raw_line.strip()
+                if not line:
+                    continue
+                if line.startswith("#"):
+                    heading = line.lstrip("#").strip().lower()
+                    in_capability_section = "capabilit" in heading
+                    continue
+                if not in_capability_section:
+                    continue
+                item = re.sub(r"^(?:[-*]|\d+[.)])\s+", "", line).strip()
+                item = re.sub(r"^(?:capability|intended capability)\s*:\s*", "", item, flags=re.I)
+                if item and item != line or raw_line.lstrip().startswith(("-", "*")):
+                    items.append(item)
+        return items
+
+    def _intent_capability_name(self, text: str) -> str:
+        candidate = re.split(r"\s+-\s+|\s*:\s*|[.!?]\s+", text.strip(), maxsplit=1)[0]
+        candidate = candidate.strip(" .:-")
+        if not candidate:
+            return ""
+        return self._title_from_words(candidate.split()[:8])
+
    def _interface_features(
        self,
        interfaces: list[ObservedFact],
@@ -437,7 +526,7 @@ class CandidateGraphGenerator:
    def _interface_attributes(self, interfaces: list[ObservedFact]) -> list[str]:
        feature_types = {self._feature_type(fact) for fact in interfaces}
        attributes = ["api" if item == "API" else "cli" if item == "CLI" else "callable" for item in feature_types]
-        return self._unique(["surface", *attributes])
+        return self._unique(["surface", *attributes, "utility-owned"])

    def _feature_attributes(
        self,
@@ -467,6 +556,9 @@ class CandidateGraphGenerator:
                "manifest" if manifests else "",
                *[fact.name for fact in frameworks],
                *[fact.name for fact in languages],
+                "utility-dependency" if manifests or frameworks else "",
+                "utility-tooling" if languages and not (manifests or frameworks) else "",
+                "review-required-structural-context",
            ]
        )

--- a/src/repo_registry/core/service.py
+++ b/src/repo_registry/core/service.py
@@ -489,6 +489,8 @@ class RegistryService:
        graph = self.store.get_candidate_graph(repository_id, analysis_run_id)
        approved_count = 0
        skipped_count = 0
+        approved_reasons: list[str] = []
+        skipped_reasons: list[str] = []
        for ability in graph.abilities:
            if ability.status != "candidate":
                continue
@@ -497,11 +499,14 @@ class RegistryService:
                for capability in ability.capabilities
                if capability.status == "candidate"
            ]
-            safe_capabilities = [
-                capability
-                for capability in candidate_capabilities
-                if self._trusted_auto_approve_capability_safe(capability)
-            ]
+            safe_capabilities = []
+            for capability in candidate_capabilities:
+                safe, reason = self._trusted_auto_approve_capability_decision(capability)
+                if safe:
+                    safe_capabilities.append(capability)
+                    approved_reasons.append(f"{capability.name}: {reason}")
+                else:
+                    skipped_reasons.append(f"{capability.name}: {reason}")
            skipped_count += len(candidate_capabilities) - len(safe_capabilities)
            if not safe_capabilities:
                continue
@@ -536,6 +541,7 @@ class RegistryService:
            notes=(
                f"{notes} Auto-approved {approved_count} safe candidate "
                f"capability(s); left {skipped_count} for review."
+                f"{self._trusted_auto_approve_notes(approved_reasons, skipped_reasons)}"
            ).strip(),
        )
        return self.store.get_ability_map(repository_id)
@@ -544,23 +550,64 @@ class RegistryService:
        self,
        capability: CandidateCapability,
    ) -> bool:
+        safe, _reason = self._trusted_auto_approve_capability_decision(capability)
+        return safe
+
+    def _trusted_auto_approve_capability_decision(
+        self,
+        capability: CandidateCapability,
+    ) -> tuple[bool, str]:
        has_source_refs = bool(capability.source_refs) or any(
            feature.source_refs for feature in capability.features
        )
        if not has_source_refs:
-            return False
+            return False, "missing source references"
        if capability.primary_class == "repository-structure":
-            return False
+            return False, "structural/dependency context requires curator review"
+        utility_relationships = self._candidate_utility_relationships(capability)
+        eligible_relationships = {"owned", "facade", "adapter"}
+        if not utility_relationships:
+            return False, "missing utility relationship"
+        if not (utility_relationships & eligible_relationships):
+            relationships = ", ".join(sorted(utility_relationships))
+            return False, f"utility relationship is not eligible ({relationships})"
        if capability.primary_class == "llm-integration":
-            return bool(
-                {"utility-owned", "utility-facade", "utility-adapter"}
-                & set(capability.attributes)
-            )
+            return True, "eligible LLM utility relationship with source support"
        if capability.primary_class in {"interface", "API", "CLI", "callable", "api", "cli"}:
-            return capability.confidence >= 0.55
+            if capability.confidence >= 0.55:
+                return True, "owned interface with sufficient confidence"
+            return False, "owned interface confidence below trusted threshold"
        if capability.features:
-            return capability.confidence >= 0.55
-        return capability.confidence >= 0.75
+            if capability.confidence >= 0.55:
+                return True, "eligible utility relationship with feature support"
+            return False, "feature-backed capability confidence below trusted threshold"
+        if capability.confidence >= 0.75:
+            return True, "eligible utility relationship with high confidence"
+        return False, "capability confidence below trusted threshold"
+
+    def _candidate_utility_relationships(
+        self,
+        capability: CandidateCapability,
+    ) -> set[str]:
+        return {
+            attribute.removeprefix("utility-")
+            for attribute in capability.attributes
+            if attribute.startswith("utility-")
+        }
+
+    def _trusted_auto_approve_notes(
+        self,
+        approved_reasons: list[str],
+        skipped_reasons: list[str],
+    ) -> str:
+        details: list[str] = []
+        if approved_reasons:
+            details.append("Approved: " + "; ".join(approved_reasons) + ".")
+        if skipped_reasons:
+            details.append("Skipped: " + "; ".join(skipped_reasons) + ".")
+        if not details:
+            return ""
+        return " " + " ".join(details)

    def _approved_counts(self, repository_id: int) -> dict[str, int]:
        ability_map = self.store.get_ability_map(repository_id)