diff --git a/TODO.md b/TODO.md index 51c1c4d..6e39081 100644 --- a/TODO.md +++ b/TODO.md @@ -48,8 +48,8 @@ the Custodian State Hub. Read it at the start of each session. It documents: ## Active Workplan -`workplans/RREG-WP-0002-production-hardening.md` is the current active workplan. -Six tasks remain open (T01–T06). Start with T01 (P0: Update Safety and Change Review) +`workplans/RREG-WP-0004-characteristic-classification-navigation.md` is the +current active workplan. Start with T01 (P0: Characteristic Classification Fields) as the highest-priority item. --- diff --git a/migrations/0001_initial.sql b/migrations/0001_initial.sql index a1e7589..5cbaa90 100644 --- a/migrations/0001_initial.sql +++ b/migrations/0001_initial.sql @@ -62,6 +62,8 @@ CREATE TABLE IF NOT EXISTS candidate_abilities ( analysis_run_id INTEGER NOT NULL REFERENCES analysis_runs(id) ON DELETE CASCADE, name TEXT NOT NULL, description TEXT NOT NULL DEFAULT '', + primary_class TEXT NOT NULL DEFAULT 'ability', + attributes TEXT NOT NULL DEFAULT '[]', confidence REAL NOT NULL DEFAULT 0.0, status TEXT NOT NULL DEFAULT 'candidate', source_refs TEXT NOT NULL DEFAULT '[]', @@ -77,6 +79,8 @@ CREATE TABLE IF NOT EXISTS candidate_capabilities ( description TEXT NOT NULL DEFAULT '', inputs TEXT NOT NULL DEFAULT '[]', outputs TEXT NOT NULL DEFAULT '[]', + primary_class TEXT NOT NULL DEFAULT 'capability', + attributes TEXT NOT NULL DEFAULT '[]', confidence REAL NOT NULL DEFAULT 0.0, status TEXT NOT NULL DEFAULT 'candidate', source_refs TEXT NOT NULL DEFAULT '[]', @@ -90,6 +94,8 @@ CREATE TABLE IF NOT EXISTS candidate_features ( capability_id INTEGER NOT NULL REFERENCES candidate_capabilities(id) ON DELETE CASCADE, name TEXT NOT NULL, type TEXT NOT NULL, + primary_class TEXT NOT NULL DEFAULT '', + attributes TEXT NOT NULL DEFAULT '[]', location TEXT NOT NULL DEFAULT '', confidence REAL NOT NULL DEFAULT 0.0, status TEXT NOT NULL DEFAULT 'candidate', @@ -128,6 +134,8 @@ CREATE TABLE IF NOT EXISTS repository_scopes ( repository_id INTEGER NOT NULL UNIQUE REFERENCES repositories(id) ON DELETE CASCADE, name TEXT NOT NULL, description TEXT NOT NULL DEFAULT '', + primary_class TEXT NOT NULL DEFAULT 'ability', + attributes TEXT NOT NULL DEFAULT '[]', confidence REAL NOT NULL DEFAULT 1.0, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); @@ -149,6 +157,8 @@ CREATE TABLE IF NOT EXISTS approved_capabilities ( description TEXT NOT NULL DEFAULT '', inputs TEXT NOT NULL DEFAULT '[]', outputs TEXT NOT NULL DEFAULT '[]', + primary_class TEXT NOT NULL DEFAULT 'capability', + attributes TEXT NOT NULL DEFAULT '[]', confidence REAL NOT NULL DEFAULT 1.0, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); @@ -159,6 +169,8 @@ CREATE TABLE IF NOT EXISTS approved_features ( capability_id INTEGER NOT NULL REFERENCES approved_capabilities(id) ON DELETE CASCADE, name TEXT NOT NULL, type TEXT NOT NULL, + primary_class TEXT NOT NULL DEFAULT '', + attributes TEXT NOT NULL DEFAULT '[]', location TEXT NOT NULL DEFAULT '', confidence REAL NOT NULL DEFAULT 1.0, source_refs TEXT NOT NULL DEFAULT '[]', diff --git a/src/repo_registry/candidate_graph/generator.py b/src/repo_registry/candidate_graph/generator.py index d22b84a..496a07e 100644 --- a/src/repo_registry/candidate_graph/generator.py +++ b/src/repo_registry/candidate_graph/generator.py @@ -21,6 +21,8 @@ class CandidateFeatureDraft: location: str confidence: float source_refs: list[SourceReference] + primary_class: str = "" + attributes: list[str] = field(default_factory=list) @dataclass(frozen=True) @@ -31,6 +33,8 @@ class CandidateCapabilityDraft: outputs: list[str] confidence: float source_refs: list[SourceReference] + primary_class: str = "capability" + attributes: list[str] = field(default_factory=list) features: list[CandidateFeatureDraft] = field(default_factory=list) evidence: list[CandidateEvidenceDraft] = field(default_factory=list) @@ -41,6 +45,8 @@ class CandidateAbilityDraft: description: str confidence: float source_refs: list[SourceReference] + primary_class: str = "ability" + attributes: list[str] = field(default_factory=list) capabilities: list[CandidateCapabilityDraft] = field(default_factory=list) @@ -68,6 +74,11 @@ class CandidateGraphGenerator: credential_configs = self._facts(facts, "credential_config") provider_registries = self._facts(facts, "provider_registry") fallback_policies = self._facts(facts, "fallback_policy") + ability_primary_class, ability_attributes = self._ability_classification( + repository, + facts, + chunks, + ) ability_sources = docs or manifests or languages ability = CandidateAbilityDraft( @@ -82,6 +93,8 @@ class CandidateGraphGenerator: languages=languages, ), source_refs=self._source_refs(ability_sources), + primary_class=ability_primary_class, + attributes=ability_attributes, capabilities=[], ) @@ -119,6 +132,12 @@ class CandidateGraphGenerator: docs=docs, ), source_refs=self._source_refs(manifests + frameworks + languages), + primary_class="repository-structure", + attributes=self._structure_attributes( + manifests, + frameworks, + languages, + ), evidence=self._evidence(tests, examples, docs), ) ) @@ -129,6 +148,8 @@ class CandidateGraphGenerator: description=ability.description, confidence=ability.confidence, source_refs=ability.source_refs, + primary_class=ability.primary_class, + attributes=ability.attributes, capabilities=capabilities, ) ] @@ -154,6 +175,8 @@ class CandidateGraphGenerator: docs=docs, ), source_refs=self._source_refs(interfaces), + primary_class="interface", + attributes=self._interface_attributes(interfaces), features=features, evidence=self._evidence(tests, examples, docs), ) @@ -181,6 +204,8 @@ class CandidateGraphGenerator: source_refs=self._source_refs( [fact for fact in providers if fact.name == provider] ), + primary_class="integration", + attributes=["llm-provider", provider.lower()], ) for provider in provider_names ] @@ -192,6 +217,8 @@ class CandidateGraphGenerator: location=self._grouped_location(credentials), confidence=0.7, source_refs=self._source_refs(credentials), + primary_class="configuration", + attributes=["credential", "llm-provider"], ) ) if registries: @@ -202,6 +229,8 @@ class CandidateGraphGenerator: location=self._grouped_location(registries), confidence=0.65, source_refs=self._source_refs(registries), + primary_class="backend", + attributes=["provider-registry", "llm-provider"], ) ) if fallback_policies: @@ -212,6 +241,8 @@ class CandidateGraphGenerator: location=self._grouped_location(fallback_policies), confidence=0.6, source_refs=self._source_refs(fallback_policies), + primary_class="backend", + attributes=["fallback-policy", "llm-provider"], ) ) return CandidateCapabilityDraft( @@ -232,6 +263,13 @@ class CandidateGraphGenerator: source_refs=self._source_refs( providers + credentials + registries + fallback_policies ), + primary_class="llm-integration", + attributes=self._llm_provider_attributes( + providers, + credentials, + registries, + fallback_policies, + ), features=features, evidence=self._evidence(tests, examples, docs), ) @@ -256,6 +294,8 @@ class CandidateGraphGenerator: location=fact.path, confidence=0.65 if fact.value else 0.45, source_refs=self._source_refs([fact]), + primary_class=feature_type, + attributes=self._feature_attributes(feature_type, [fact]), ) ) continue @@ -271,6 +311,8 @@ class CandidateGraphGenerator: location=self._grouped_location(facts), confidence=self._grouped_interface_confidence(facts), source_refs=self._source_refs(facts), + primary_class=feature_type, + attributes=self._feature_attributes(feature_type, facts), ) ) return features @@ -357,6 +399,96 @@ class CandidateGraphGenerator: return "API" return "interface" + def _ability_classification( + self, + repository: Repository, + facts: list[ObservedFact], + chunks: list[ContentChunk], + ) -> tuple[str, list[str]]: + text = " ".join( + [ + repository.name, + repository.description or "", + " ".join(chunk.text[:600] for chunk in chunks if chunk.kind == "documentation"), + " ".join(f"{fact.kind} {fact.name} {fact.value}" for fact in facts), + ] + ).lower() + attributes: list[str] = [] + if any(token in text for token in ("repository", "repo", "registry")): + attributes.append("repository") + if any(token in text for token in ("ability", "capability", "feature")): + return "repository-intelligence", self._unique(attributes + ["capability-mapping"]) + if any(token in text for token in ("llm", "openrouter", "claude", "model provider")): + return "ai-integration", self._unique(attributes + ["llm-provider"]) + if any(fact.kind == "interface" for fact in facts): + attributes.append("interface") + return "developer-tooling", self._unique(attributes) + + def _interface_attributes(self, interfaces: list[ObservedFact]) -> list[str]: + feature_types = {self._feature_type(fact) for fact in interfaces} + attributes = ["api" if item == "API" else "cli" if item == "CLI" else "callable" for item in feature_types] + return self._unique(["surface", *attributes]) + + def _feature_attributes( + self, + feature_type: str, + facts: list[ObservedFact], + ) -> list[str]: + attributes = [feature_type] + if feature_type == "API": + attributes.extend(["surface", "http"]) + elif feature_type == "CLI": + attributes.extend(["surface", "command"]) + else: + attributes.append("surface") + paths = " ".join(fact.path.lower() for fact in facts) + if "test" in paths: + attributes.append("test-linked") + return self._unique(attributes) + + def _structure_attributes( + self, + manifests: list[ObservedFact], + frameworks: list[ObservedFact], + languages: list[ObservedFact], + ) -> list[str]: + return self._unique( + [ + "manifest" if manifests else "", + *[fact.name for fact in frameworks], + *[fact.name for fact in languages], + ] + ) + + def _llm_provider_attributes( + self, + providers: list[ObservedFact], + credentials: list[ObservedFact], + registries: list[ObservedFact], + fallback_policies: list[ObservedFact], + ) -> list[str]: + return self._unique( + [ + "llm-provider", + *[fact.name.lower() for fact in providers], + "credential" if credentials else "", + "provider-registry" if registries else "", + "fallback-policy" if fallback_policies else "", + ] + ) + + def _unique(self, values: list[str]) -> list[str]: + result: list[str] = [] + seen: set[str] = set() + for value in values: + item = value.strip() + key = item.lower() + if not item or key in seen: + continue + seen.add(key) + result.append(item) + return result + def _interface_inputs(self, interfaces: list[ObservedFact]) -> list[str]: feature_types = {self._feature_type(fact) for fact in interfaces} inputs: list[str] = [] diff --git a/src/repo_registry/candidate_graph/normalization.py b/src/repo_registry/candidate_graph/normalization.py index bd837cc..f1de815 100644 --- a/src/repo_registry/candidate_graph/normalization.py +++ b/src/repo_registry/candidate_graph/normalization.py @@ -73,6 +73,8 @@ def _combine_abilities( description=_preferred_description(left.description, right.description), confidence=max(left.confidence, right.confidence), source_refs=_merge_source_refs(left.source_refs, right.source_refs), + primary_class=_preferred_text(left.primary_class, right.primary_class), + attributes=_merge_strings(left.attributes, right.attributes), capabilities=_merge_capabilities(left.capabilities + right.capabilities), ) @@ -107,6 +109,8 @@ def _combine_capabilities( outputs=_merge_strings(left.outputs, right.outputs), confidence=max(left.confidence, right.confidence), source_refs=_merge_source_refs(left.source_refs, right.source_refs), + primary_class=_preferred_text(left.primary_class, right.primary_class), + attributes=_merge_strings(left.attributes, right.attributes), features=_merge_features(left.features + right.features), evidence=_merge_evidence(left.evidence + right.evidence), ) @@ -128,6 +132,8 @@ def _merge_features( location=_preferred_text(existing.location, feature.location), confidence=max(existing.confidence, feature.confidence), source_refs=_merge_source_refs(existing.source_refs, feature.source_refs), + primary_class=_preferred_text(existing.primary_class, feature.primary_class), + attributes=_merge_strings(existing.attributes, feature.attributes), ) return merged diff --git a/src/repo_registry/core/models.py b/src/repo_registry/core/models.py index 77b2afc..78148b7 100644 --- a/src/repo_registry/core/models.py +++ b/src/repo_registry/core/models.py @@ -161,6 +161,8 @@ class CandidateFeature: status: str source_refs: list[SourceReference] confidence_label: str = "" + primary_class: str = "" + attributes: list[str] = field(default_factory=list) @dataclass(frozen=True) @@ -174,6 +176,8 @@ class CandidateCapability: status: str source_refs: list[SourceReference] confidence_label: str = "" + primary_class: str = "capability" + attributes: list[str] = field(default_factory=list) features: list[CandidateFeature] = field(default_factory=list) evidence: list[CandidateEvidence] = field(default_factory=list) @@ -187,6 +191,8 @@ class CandidateAbility: status: str source_refs: list[SourceReference] confidence_label: str = "" + primary_class: str = "ability" + attributes: list[str] = field(default_factory=list) capabilities: list[CandidateCapability] = field(default_factory=list) @@ -228,6 +234,8 @@ class Feature: confidence: float confidence_label: str = "" source_refs: list[SourceReference] = field(default_factory=list) + primary_class: str = "" + attributes: list[str] = field(default_factory=list) @dataclass(frozen=True) @@ -239,6 +247,8 @@ class Capability: outputs: list[str] confidence: float confidence_label: str = "" + primary_class: str = "capability" + attributes: list[str] = field(default_factory=list) features: list[Feature] = field(default_factory=list) evidence: list[Evidence] = field(default_factory=list) @@ -250,6 +260,8 @@ class Ability: description: str confidence: float confidence_label: str = "" + primary_class: str = "ability" + attributes: list[str] = field(default_factory=list) capabilities: list[Capability] = field(default_factory=list) diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py index 157e214..0fc0cbd 100644 --- a/src/repo_registry/core/service.py +++ b/src/repo_registry/core/service.py @@ -369,6 +369,8 @@ class RegistryService: location=feature.location, confidence=feature.confidence, source_refs=feature.source_refs, + primary_class=feature.primary_class, + attributes=feature.attributes, ) for evidence in capability.evidence: if evidence.status != "candidate": @@ -512,6 +514,8 @@ class RegistryService: location=feature.location, confidence=feature.confidence, source_refs=feature.source_refs, + primary_class=feature.primary_class, + attributes=feature.attributes, ) self.store.mark_candidate_feature_status( repository_id, @@ -655,6 +659,8 @@ class RegistryService: inputs=capability.inputs, outputs=capability.outputs, confidence=capability.confidence, + primary_class=capability.primary_class, + attributes=capability.attributes, ) for feature in capability.features: if feature.status != "candidate": @@ -667,6 +673,8 @@ class RegistryService: location=feature.location, confidence=feature.confidence, source_refs=feature.source_refs, + primary_class=feature.primary_class, + attributes=feature.attributes, ) for evidence in capability.evidence: if evidence.status != "candidate": @@ -702,6 +710,8 @@ class RegistryService: name=candidate_ability.name, description=candidate_ability.description, confidence=candidate_ability.confidence, + primary_class=candidate_ability.primary_class, + attributes=candidate_ability.attributes, ) def _ensure_approved_capability( @@ -726,6 +736,8 @@ class RegistryService: inputs=candidate_capability.inputs, outputs=candidate_capability.outputs, confidence=candidate_capability.confidence, + primary_class=candidate_capability.primary_class, + attributes=candidate_capability.attributes, ) def _candidate_capability_with_parent( @@ -884,6 +896,8 @@ class RegistryService: name: str, description: str, confidence: float, + primary_class: str = "ability", + attributes: Sequence[str] = (), notes: str = "", ) -> CandidateGraph: self.store.update_candidate_ability( @@ -893,6 +907,8 @@ class RegistryService: name=name, description=description, confidence=confidence, + primary_class=primary_class, + attributes=list(attributes), ) self.store.create_review_decision( repository_id, @@ -912,6 +928,8 @@ class RegistryService: name: str, description: str, confidence: float, + primary_class: str = "capability", + attributes: Sequence[str] = (), notes: str = "", ) -> CandidateGraph: self.store.update_candidate_capability( @@ -921,6 +939,8 @@ class RegistryService: name=name, description=description, confidence=confidence, + primary_class=primary_class, + attributes=list(attributes), ) self.store.create_review_decision( repository_id, @@ -931,6 +951,40 @@ class RegistryService: self.store.update_repository_status(repository_id, "reviewing") return self.store.get_candidate_graph(repository_id, analysis_run_id) + def edit_candidate_feature( + self, + repository_id: int, + analysis_run_id: int, + candidate_feature_id: int, + *, + name: str, + type: str, + location: str, + confidence: float, + primary_class: str | None = None, + attributes: Sequence[str] = (), + notes: str = "", + ) -> CandidateGraph: + self.store.update_candidate_feature( + repository_id, + analysis_run_id, + candidate_feature_id, + name=name, + type=type, + location=location, + confidence=confidence, + primary_class=primary_class, + attributes=list(attributes), + ) + self.store.create_review_decision( + repository_id, + analysis_run_id, + action="edit_candidate_feature", + notes=notes, + ) + self.store.update_repository_status(repository_id, "reviewing") + return self.store.get_candidate_graph(repository_id, analysis_run_id) + def relink_candidate_capability( self, repository_id: int, @@ -1106,6 +1160,8 @@ class RegistryService: name: str, description: str = "", confidence: float = 1.0, + primary_class: str = "ability", + attributes: Sequence[str] = (), ) -> int: self.store.get_repository(repository_id) return self.store.create_ability( @@ -1113,6 +1169,8 @@ class RegistryService: name=name, description=description, confidence=confidence, + primary_class=primary_class, + attributes=list(attributes), ) def update_ability( @@ -1123,6 +1181,8 @@ class RegistryService: name: str | None = None, description: str | None = None, confidence: float | None = None, + primary_class: str | None = None, + attributes: Sequence[str] | None = None, ) -> RepositoryAbilityMap: self.store.update_ability( repository_id, @@ -1130,6 +1190,8 @@ class RegistryService: name=name, description=description, confidence=confidence, + primary_class=primary_class, + attributes=list(attributes) if attributes is not None else None, ) return self.store.get_ability_map(repository_id) @@ -1151,6 +1213,8 @@ class RegistryService: inputs: Sequence[str] = (), outputs: Sequence[str] = (), confidence: float = 1.0, + primary_class: str = "capability", + attributes: Sequence[str] = (), ) -> int: self.store.ensure_ability(repository_id, ability_id) return self.store.create_capability( @@ -1161,6 +1225,8 @@ class RegistryService: inputs=list(inputs), outputs=list(outputs), confidence=confidence, + primary_class=primary_class, + attributes=list(attributes), ) def update_capability( @@ -1173,6 +1239,8 @@ class RegistryService: inputs: Sequence[str] | None = None, outputs: Sequence[str] | None = None, confidence: float | None = None, + primary_class: str | None = None, + attributes: Sequence[str] | None = None, ) -> RepositoryAbilityMap: self.store.update_capability( repository_id, @@ -1182,6 +1250,8 @@ class RegistryService: inputs=list(inputs) if inputs is not None else None, outputs=list(outputs) if outputs is not None else None, confidence=confidence, + primary_class=primary_class, + attributes=list(attributes) if attributes is not None else None, ) return self.store.get_ability_map(repository_id) @@ -1202,6 +1272,8 @@ class RegistryService: type: str, location: str = "", confidence: float = 1.0, + primary_class: str | None = None, + attributes: Sequence[str] = (), ) -> int: self.store.ensure_capability(repository_id, capability_id) return self.store.create_feature( @@ -1211,6 +1283,8 @@ class RegistryService: type=type, location=location, confidence=confidence, + primary_class=primary_class, + attributes=list(attributes), ) def update_feature( @@ -1222,6 +1296,8 @@ class RegistryService: type: str | None = None, location: str | None = None, confidence: float | None = None, + primary_class: str | None = None, + attributes: Sequence[str] | None = None, ) -> RepositoryAbilityMap: self.store.update_feature( repository_id, @@ -1230,6 +1306,8 @@ class RegistryService: type=type, location=location, confidence=confidence, + primary_class=primary_class, + attributes=list(attributes) if attributes is not None else None, ) return self.store.get_ability_map(repository_id) diff --git a/src/repo_registry/storage/sqlite.py b/src/repo_registry/storage/sqlite.py index 9437b6a..e099368 100644 --- a/src/repo_registry/storage/sqlite.py +++ b/src/repo_registry/storage/sqlite.py @@ -51,6 +51,7 @@ class RegistryStore: self._ensure_repository_scopes_table(connection) self._ensure_approved_source_ref_columns(connection) self._ensure_evidence_relationship_columns(connection) + self._ensure_characteristic_classification_columns(connection) self._ensure_expectation_gaps_table(connection) def connect(self) -> sqlite3.Connection: @@ -108,6 +109,60 @@ class RegistryStore: """ ) + def _ensure_characteristic_classification_columns( + self, + connection: sqlite3.Connection, + ) -> None: + defaults = { + "candidate_abilities": "ability", + "approved_abilities": "ability", + "candidate_capabilities": "capability", + "approved_capabilities": "capability", + "candidate_features": "", + "approved_features": "", + } + for table, default_class in defaults.items(): + columns = { + row["name"] + for row in connection.execute(f"PRAGMA table_info({table})").fetchall() + } + if "primary_class" not in columns: + connection.execute( + f"ALTER TABLE {table} ADD COLUMN primary_class TEXT NOT NULL DEFAULT '{default_class}'" + ) + if "attributes" not in columns: + connection.execute( + f"ALTER TABLE {table} ADD COLUMN attributes TEXT NOT NULL DEFAULT '[]'" + ) + + for table in ("candidate_abilities", "approved_abilities"): + connection.execute( + f""" + UPDATE {table} + SET primary_class = COALESCE(NULLIF(primary_class, ''), 'ability'), + attributes = COALESCE(NULLIF(attributes, ''), '[]') + WHERE primary_class = '' OR attributes = '' + """ + ) + for table in ("candidate_capabilities", "approved_capabilities"): + connection.execute( + f""" + UPDATE {table} + SET primary_class = COALESCE(NULLIF(primary_class, ''), 'capability'), + attributes = COALESCE(NULLIF(attributes, ''), '[]') + WHERE primary_class = '' OR attributes = '' + """ + ) + for table in ("candidate_features", "approved_features"): + connection.execute( + f""" + UPDATE {table} + SET primary_class = COALESCE(NULLIF(primary_class, ''), type), + attributes = COALESCE(NULLIF(attributes, ''), json_array(type)) + WHERE primary_class = '' OR attributes = '' + """ + ) + def _ensure_content_chunks_table(self, connection: sqlite3.Connection) -> None: connection.execute( """ @@ -361,14 +416,17 @@ class RegistryStore: ability_cursor = connection.execute( """ INSERT INTO candidate_abilities - (repository_id, analysis_run_id, name, description, confidence, source_refs) - VALUES (?, ?, ?, ?, ?, ?) + (repository_id, analysis_run_id, name, description, primary_class, + attributes, confidence, source_refs) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, ( repository_id, analysis_run_id, ability.name, ability.description, + ability.primary_class or "ability", + self._attributes_to_json(ability.attributes), ability.confidence, self._source_refs_to_json(ability.source_refs), ), @@ -379,8 +437,8 @@ class RegistryStore: """ INSERT INTO candidate_capabilities (repository_id, analysis_run_id, ability_id, name, description, - inputs, outputs, confidence, source_refs) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + inputs, outputs, primary_class, attributes, confidence, source_refs) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repository_id, @@ -390,6 +448,8 @@ class RegistryStore: capability.description, json.dumps(capability.inputs), json.dumps(capability.outputs), + capability.primary_class or "capability", + self._attributes_to_json(capability.attributes), capability.confidence, self._source_refs_to_json(capability.source_refs), ), @@ -400,8 +460,8 @@ class RegistryStore: """ INSERT INTO candidate_features (repository_id, analysis_run_id, capability_id, name, type, - location, confidence, source_refs) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + primary_class, attributes, location, confidence, source_refs) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repository_id, @@ -409,6 +469,10 @@ class RegistryStore: capability_id, feature.name, feature.type, + feature.primary_class or feature.type, + self._attributes_to_json( + feature.attributes or [feature.type] + ), feature.location, feature.confidence, self._source_refs_to_json(feature.source_refs), @@ -448,7 +512,8 @@ class RegistryStore: with self.connect() as connection: ability_rows = connection.execute( """ - SELECT id, name, description, confidence, status, source_refs + SELECT id, name, description, primary_class, attributes, confidence, + status, source_refs FROM candidate_abilities WHERE repository_id = ? AND analysis_run_id = ? ORDER BY id @@ -458,7 +523,7 @@ class RegistryStore: capability_rows = connection.execute( """ SELECT id, ability_id, name, description, inputs, outputs, - confidence, status, source_refs + primary_class, attributes, confidence, status, source_refs FROM candidate_capabilities WHERE repository_id = ? AND analysis_run_id = ? ORDER BY id @@ -467,8 +532,8 @@ class RegistryStore: ).fetchall() feature_rows = connection.execute( """ - SELECT id, capability_id, name, type, location, confidence, - status, source_refs + SELECT id, capability_id, name, type, primary_class, attributes, + location, confidence, status, source_refs FROM candidate_features WHERE repository_id = ? AND analysis_run_id = ? ORDER BY id @@ -498,6 +563,8 @@ class RegistryStore: status=row["status"], source_refs=self._source_refs_from_json(row["source_refs"]), confidence_label=confidence_label(row["confidence"]), + primary_class=row["primary_class"] or row["type"], + attributes=self._attributes_from_json(row["attributes"]), ) ) @@ -531,6 +598,8 @@ class RegistryStore: status=row["status"], source_refs=self._source_refs_from_json(row["source_refs"]), confidence_label=confidence_label(row["confidence"]), + primary_class=row["primary_class"] or "capability", + attributes=self._attributes_from_json(row["attributes"]), features=features_by_capability.get(row["id"], []), evidence=evidence_by_capability.get(row["id"], []), ) @@ -545,6 +614,8 @@ class RegistryStore: status=row["status"], source_refs=self._source_refs_from_json(row["source_refs"]), confidence_label=confidence_label(row["confidence"]), + primary_class=row["primary_class"] or "ability", + attributes=self._attributes_from_json(row["attributes"]), capabilities=capabilities_by_ability.get(row["id"], []), ) for row in ability_rows @@ -861,17 +932,22 @@ class RegistryStore: name: str, description: str, confidence: float, + primary_class: str = "ability", + attributes: list[str] | None = None, ) -> None: with self.connect() as connection: cursor = connection.execute( """ UPDATE candidate_abilities - SET name = ?, description = ?, confidence = ? + SET name = ?, description = ?, primary_class = ?, attributes = ?, + confidence = ? WHERE id = ? AND repository_id = ? AND analysis_run_id = ? """, ( name, description, + primary_class or "ability", + self._attributes_to_json(attributes or []), confidence, candidate_ability_id, repository_id, @@ -894,17 +970,22 @@ class RegistryStore: name: str, description: str, confidence: float, + primary_class: str = "capability", + attributes: list[str] | None = None, ) -> None: with self.connect() as connection: cursor = connection.execute( """ UPDATE candidate_capabilities - SET name = ?, description = ?, confidence = ? + SET name = ?, description = ?, primary_class = ?, attributes = ?, + confidence = ? WHERE id = ? AND repository_id = ? AND analysis_run_id = ? """, ( name, description, + primary_class or "capability", + self._attributes_to_json(attributes or []), confidence, candidate_capability_id, repository_id, @@ -918,6 +999,46 @@ class RegistryStore: f"{repository_id} analysis run {analysis_run_id}" ) + def update_candidate_feature( + self, + repository_id: int, + analysis_run_id: int, + candidate_feature_id: int, + *, + name: str, + type: str, + location: str, + confidence: float, + primary_class: str | None = None, + attributes: list[str] | None = None, + ) -> None: + with self.connect() as connection: + cursor = connection.execute( + """ + UPDATE candidate_features + SET name = ?, type = ?, primary_class = ?, attributes = ?, + location = ?, confidence = ? + WHERE id = ? AND repository_id = ? AND analysis_run_id = ? + """, + ( + name, + type, + primary_class or type, + self._attributes_to_json(attributes or [type]), + location, + confidence, + candidate_feature_id, + repository_id, + analysis_run_id, + ), + ) + if cursor.rowcount == 0: + raise NotFoundError( + "candidate feature " + f"{candidate_feature_id} was not found for repository " + f"{repository_id} analysis run {analysis_run_id}" + ) + def relink_candidate_capability( self, repository_id: int, @@ -1604,15 +1725,24 @@ class RegistryStore: name: str, description: str, confidence: float, + primary_class: str = "ability", + attributes: list[str] | None = None, ) -> int: with self.connect() as connection: cursor = connection.execute( """ INSERT INTO approved_abilities - (repository_id, name, description, confidence) - VALUES (?, ?, ?, ?) + (repository_id, name, description, primary_class, attributes, confidence) + VALUES (?, ?, ?, ?, ?, ?) """, - (repository_id, name, description, confidence), + ( + repository_id, + name, + description, + primary_class or "ability", + self._attributes_to_json(attributes or []), + confidence, + ), ) return int(cursor.lastrowid) @@ -1638,6 +1768,8 @@ class RegistryStore: name: str | None = None, description: str | None = None, confidence: float | None = None, + primary_class: str | None = None, + attributes: list[str] | None = None, ) -> None: self._update_approved_row( table="approved_abilities", @@ -1648,6 +1780,12 @@ class RegistryStore: "name": name, "description": description, "confidence": confidence, + "primary_class": primary_class, + "attributes": ( + self._attributes_to_json(attributes) + if attributes is not None + else None + ), }, ) @@ -1669,13 +1807,16 @@ class RegistryStore: inputs: list[str], outputs: list[str], confidence: float, + primary_class: str = "capability", + attributes: list[str] | None = None, ) -> int: with self.connect() as connection: cursor = connection.execute( """ INSERT INTO approved_capabilities - (repository_id, ability_id, name, description, inputs, outputs, confidence) - VALUES (?, ?, ?, ?, ?, ?, ?) + (repository_id, ability_id, name, description, inputs, outputs, + primary_class, attributes, confidence) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repository_id, @@ -1684,6 +1825,8 @@ class RegistryStore: description, json.dumps(inputs), json.dumps(outputs), + primary_class or "capability", + self._attributes_to_json(attributes or []), confidence, ), ) @@ -1713,6 +1856,8 @@ class RegistryStore: inputs: list[str] | None = None, outputs: list[str] | None = None, confidence: float | None = None, + primary_class: str | None = None, + attributes: list[str] | None = None, ) -> None: self._update_approved_row( table="approved_capabilities", @@ -1725,6 +1870,12 @@ class RegistryStore: "inputs": json.dumps(inputs) if inputs is not None else None, "outputs": json.dumps(outputs) if outputs is not None else None, "confidence": confidence, + "primary_class": primary_class, + "attributes": ( + self._attributes_to_json(attributes) + if attributes is not None + else None + ), }, ) @@ -1746,19 +1897,24 @@ class RegistryStore: location: str, confidence: float, source_refs: list[SourceReference] | None = None, + primary_class: str | None = None, + attributes: list[str] | None = None, ) -> int: with self.connect() as connection: cursor = connection.execute( """ INSERT INTO approved_features - (repository_id, capability_id, name, type, location, confidence, source_refs) - VALUES (?, ?, ?, ?, ?, ?, ?) + (repository_id, capability_id, name, type, primary_class, attributes, + location, confidence, source_refs) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repository_id, capability_id, name, type, + primary_class or type, + self._attributes_to_json(attributes or [type]), location, confidence, self._source_refs_to_json(source_refs or []), @@ -1775,6 +1931,8 @@ class RegistryStore: type: str | None = None, location: str | None = None, confidence: float | None = None, + primary_class: str | None = None, + attributes: list[str] | None = None, ) -> None: self._update_approved_row( table="approved_features", @@ -1784,6 +1942,12 @@ class RegistryStore: values={ "name": name, "type": type, + "primary_class": primary_class, + "attributes": ( + self._attributes_to_json(attributes) + if attributes is not None + else None + ), "location": location, "confidence": confidence, }, @@ -1968,13 +2132,15 @@ class RegistryStore: ability_cursor = connection.execute( """ INSERT INTO approved_abilities - (repository_id, name, description, confidence) - VALUES (?, ?, ?, ?) + (repository_id, name, description, primary_class, attributes, confidence) + VALUES (?, ?, ?, ?, ?, ?) """, ( repository_id, ability.name, ability.description, + ability.primary_class or "ability", + self._attributes_to_json(ability.attributes), ability.confidence, ), ) @@ -1986,8 +2152,8 @@ class RegistryStore: """ INSERT INTO approved_capabilities (repository_id, ability_id, name, description, inputs, outputs, - confidence) - VALUES (?, ?, ?, ?, ?, ?, ?) + primary_class, attributes, confidence) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repository_id, @@ -1996,6 +2162,8 @@ class RegistryStore: capability.description, json.dumps(capability.inputs), json.dumps(capability.outputs), + capability.primary_class or "capability", + self._attributes_to_json(capability.attributes), capability.confidence, ), ) @@ -2006,15 +2174,19 @@ class RegistryStore: connection.execute( """ INSERT INTO approved_features - (repository_id, capability_id, name, type, location, - confidence, source_refs) - VALUES (?, ?, ?, ?, ?, ?, ?) + (repository_id, capability_id, name, type, primary_class, + attributes, location, confidence, source_refs) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repository_id, approved_capability_id, feature.name, feature.type, + feature.primary_class or feature.type, + self._attributes_to_json( + feature.attributes or [feature.type] + ), feature.location, feature.confidence, self._source_refs_to_json(feature.source_refs), @@ -2051,7 +2223,7 @@ class RegistryStore: with self.connect() as connection: ability_rows = connection.execute( """ - SELECT id, name, description, confidence + SELECT id, name, description, primary_class, attributes, confidence FROM approved_abilities WHERE repository_id = ? ORDER BY id @@ -2060,7 +2232,8 @@ class RegistryStore: ).fetchall() capability_rows = connection.execute( """ - SELECT id, ability_id, name, description, inputs, outputs, confidence + SELECT id, ability_id, name, description, inputs, outputs, + primary_class, attributes, confidence FROM approved_capabilities WHERE repository_id = ? ORDER BY id @@ -2069,7 +2242,8 @@ class RegistryStore: ).fetchall() feature_rows = connection.execute( """ - SELECT id, capability_id, name, type, location, confidence, source_refs + SELECT id, capability_id, name, type, primary_class, attributes, + location, confidence, source_refs FROM approved_features WHERE repository_id = ? ORDER BY id @@ -2098,6 +2272,8 @@ class RegistryStore: confidence=row["confidence"], confidence_label=confidence_label(row["confidence"]), source_refs=self._source_refs_from_json(row["source_refs"]), + primary_class=row["primary_class"] or row["type"], + attributes=self._attributes_from_json(row["attributes"]), ) ) @@ -2128,6 +2304,8 @@ class RegistryStore: outputs=json.loads(row["outputs"]), confidence=row["confidence"], confidence_label=confidence_label(row["confidence"]), + primary_class=row["primary_class"] or "capability", + attributes=self._attributes_from_json(row["attributes"]), features=features_by_capability.get(row["id"], []), evidence=evidence_by_capability.get(row["id"], []), ) @@ -2140,6 +2318,8 @@ class RegistryStore: description=row["description"], confidence=row["confidence"], confidence_label=confidence_label(row["confidence"]), + primary_class=row["primary_class"] or "ability", + attributes=self._attributes_from_json(row["attributes"]), capabilities=capabilities_by_ability.get(row["id"], []), ) for row in ability_rows @@ -2578,6 +2758,17 @@ class RegistryStore: ] ) + def _attributes_to_json(self, attributes: list[str]) -> str: + return json.dumps([item.strip() for item in attributes if item.strip()]) + + def _attributes_from_json(self, value: str) -> list[str]: + if not value: + return [] + parsed = json.loads(value) + if not isinstance(parsed, list): + return [] + return [str(item) for item in parsed if str(item).strip()] + def _source_refs_from_json(self, value: str) -> list[SourceReference]: return [ SourceReference( diff --git a/src/repo_registry/web_api/schemas.py b/src/repo_registry/web_api/schemas.py index 792775d..0561ab8 100644 --- a/src/repo_registry/web_api/schemas.py +++ b/src/repo_registry/web_api/schemas.py @@ -495,6 +495,8 @@ class CandidateFeatureResponse(BaseModel): id: int name: str type: str + primary_class: str + attributes: list[str] location: str confidence: float status: str @@ -508,6 +510,8 @@ class CandidateCapabilityResponse(BaseModel): description: str inputs: list[str] outputs: list[str] + primary_class: str + attributes: list[str] confidence: float status: str source_refs: list[SourceReferenceResponse] @@ -520,6 +524,8 @@ class CandidateAbilityResponse(BaseModel): id: int name: str description: str + primary_class: str + attributes: list[str] confidence: float status: str source_refs: list[SourceReferenceResponse] @@ -689,6 +695,8 @@ class FeatureResponse(BaseModel): id: int name: str type: str + primary_class: str + attributes: list[str] location: str confidence: float confidence_label: str @@ -701,6 +709,8 @@ class CapabilityResponse(BaseModel): description: str inputs: list[str] outputs: list[str] + primary_class: str + attributes: list[str] confidence: float confidence_label: str features: list[FeatureResponse] @@ -711,6 +721,8 @@ class AbilityResponse(BaseModel): id: int name: str description: str + primary_class: str + attributes: list[str] confidence: float confidence_label: str capabilities: list[CapabilityResponse] diff --git a/src/repo_registry/web_ui/views.py b/src/repo_registry/web_ui/views.py index 70ee465..ade3674 100644 --- a/src/repo_registry/web_ui/views.py +++ b/src/repo_registry/web_ui/views.py @@ -719,6 +719,8 @@ def repository_detail(