generated from coulomb/repo-seed
Add discovery connector follow-up
This commit is contained in:
@@ -131,6 +131,52 @@ is present in the current scan and has `mode: replacement`. Missing candidates
|
||||
from additive scopes, such as broad LLM evidence bundles, are left alone.
|
||||
Existing tombstones are preserved so repeated scans can explain graph drift.
|
||||
|
||||
## Connector Follow-Up
|
||||
|
||||
Connector follow-up is explicit and separated from repo-local extraction:
|
||||
|
||||
```bash
|
||||
railiance-fabric scan . \
|
||||
--repo-slug railiance-fabric \
|
||||
--connector local-fabric-registry \
|
||||
--connector-manifest registry/local-repos.yaml \
|
||||
--dry-run
|
||||
```
|
||||
|
||||
The connector interface has slots for:
|
||||
|
||||
- package registries
|
||||
- container registries
|
||||
- API catalogs
|
||||
- service catalogs
|
||||
- deployment inventories
|
||||
- existing Fabric registry data
|
||||
|
||||
The first implementation is `local-fabric-registry`, an offline-safe connector
|
||||
that reads a local onboarding manifest such as `registry/local-repos.yaml`. It
|
||||
adds a `FabricRegistryEntry` candidate, a `cataloged_as` edge from the
|
||||
repository node, and registry-sourced attributes such as domain, remote URL,
|
||||
default branch, State Hub repo id, and declaration paths.
|
||||
|
||||
Connector evidence uses its own replacement scope with source kind
|
||||
`fabric_registry`, so rescans can replace catalog facts without retiring
|
||||
repo-local evidence. Connector run metadata is recorded under `connector_runs`
|
||||
with status, source, message, and candidate counts.
|
||||
|
||||
Connector-derived facts should be treated this way:
|
||||
|
||||
- accepted: only when the connector reads explicit repo-owned declarations or a
|
||||
catalog already governed as authoritative for that field
|
||||
- candidate: stable local registry facts such as onboarding manifest entries,
|
||||
declared remote URLs, State Hub ids, and declaration paths
|
||||
- review-only: missing catalogs, rate limits, connector failures, ambiguous
|
||||
matches, or facts from catalogs with unclear ownership
|
||||
|
||||
Failures do not corrupt the scan. Missing catalogs become
|
||||
`connector_unavailable` review artifacts, malformed catalogs become
|
||||
`connector_failed` artifacts, and future remote connectors should use
|
||||
`connector_rate_limited` when backoff is required.
|
||||
|
||||
## Identity
|
||||
|
||||
Identity is the main safety boundary. The scanner must not append guesses on
|
||||
|
||||
@@ -10,6 +10,7 @@ import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from .connectors import ConnectorConfig
|
||||
from .loader import declaration_files, load_yaml
|
||||
from .graph import FabricGraph, build_graph
|
||||
from .graph_explorer import fabric_graph_explorer_payload
|
||||
@@ -82,6 +83,19 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
scan.add_argument("--llm-temperature", type=float, default=0.0)
|
||||
scan.add_argument("--llm-max-tokens", type=int, default=1500)
|
||||
scan.add_argument("--llm-min-confidence", type=float, default=0.6)
|
||||
scan.add_argument(
|
||||
"--connector",
|
||||
action="append",
|
||||
choices=["local-fabric-registry"],
|
||||
default=[],
|
||||
help="Enable a discovery connector. May be passed more than once.",
|
||||
)
|
||||
scan.add_argument(
|
||||
"--connector-manifest",
|
||||
type=Path,
|
||||
default=Path("registry/local-repos.yaml"),
|
||||
help="Manifest path for the local-fabric-registry connector.",
|
||||
)
|
||||
|
||||
registry = sub.add_parser("registry", help="Feed a running Railiance Fabric registry service.")
|
||||
registry_sub = registry.add_subparsers(dest="registry_command", required=True)
|
||||
@@ -410,6 +424,14 @@ def _scan_repo(args: argparse.Namespace) -> int:
|
||||
max_tokens=args.llm_max_tokens,
|
||||
min_confidence=args.llm_min_confidence,
|
||||
),
|
||||
connectors=[
|
||||
ConnectorConfig(
|
||||
connector_id=connector_id,
|
||||
connector_type="fabric_registry",
|
||||
source_path=str(args.connector_manifest),
|
||||
)
|
||||
for connector_id in args.connector
|
||||
],
|
||||
)
|
||||
)
|
||||
if args.previous_snapshot:
|
||||
@@ -433,7 +455,10 @@ def _scan_repo(args: argparse.Namespace) -> int:
|
||||
candidates = snapshot["candidates"]
|
||||
review_count = len(snapshot.get("review_artifacts", []))
|
||||
review_summary = f", {review_count} review artifact(s)" if review_count else ""
|
||||
diff = snapshot.get("reconciliation", {}).get("diff", {})
|
||||
connector_count = len(snapshot.get("connector_runs", []))
|
||||
connector_summary = f", {connector_count} connector run(s)" if connector_count else ""
|
||||
reconciliation = snapshot.get("reconciliation", {})
|
||||
diff = reconciliation.get("diff") if isinstance(reconciliation, dict) else None
|
||||
diff_summary = ""
|
||||
if isinstance(diff, dict):
|
||||
diff_summary = (
|
||||
@@ -451,6 +476,7 @@ def _scan_repo(args: argparse.Namespace) -> int:
|
||||
f"{len(candidates['attributes'])} attribute(s), "
|
||||
f"{len(snapshot['replacement_scopes'])} replacement scope(s)"
|
||||
f"{review_summary}"
|
||||
f"{connector_summary}"
|
||||
f"{diff_summary}"
|
||||
)
|
||||
if args.output:
|
||||
|
||||
491
railiance_fabric/connectors.py
Normal file
491
railiance_fabric/connectors.py
Normal file
@@ -0,0 +1,491 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Protocol
|
||||
|
||||
from .discovery import (
|
||||
attribute_stable_key,
|
||||
discovery_stable_key,
|
||||
relationship_stable_key,
|
||||
replacement_scope_id,
|
||||
short_fingerprint,
|
||||
source_fingerprint,
|
||||
)
|
||||
from .loader import load_yaml
|
||||
|
||||
|
||||
CONNECTOR_TYPES = {
|
||||
"package_registry": "Package registry metadata such as package descriptions, versions, and ownership.",
|
||||
"container_registry": "Container image metadata such as image names, tags, digests, and labels.",
|
||||
"api_catalog": "API catalog metadata such as OpenAPI/AsyncAPI records and ownership.",
|
||||
"service_catalog": "Service catalog metadata such as service ownership, lifecycle, and dependencies.",
|
||||
"deployment_inventory": "Deployment inventory metadata such as environments, workloads, and runtime placement.",
|
||||
"fabric_registry": "Existing Railiance Fabric registry or onboarding manifest data.",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ConnectorConfig:
|
||||
connector_id: str
|
||||
connector_type: str
|
||||
source_path: str | None = None
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ConnectorContext:
|
||||
repo_path: Path
|
||||
repo_slug: str
|
||||
snapshot: dict[str, Any]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConnectorResult:
|
||||
connector_run: dict[str, object]
|
||||
replacement_scopes: list[dict[str, object]]
|
||||
candidates: dict[str, list[dict[str, object]]]
|
||||
review_artifacts: list[dict[str, object]]
|
||||
|
||||
|
||||
class DiscoveryConnector(Protocol):
|
||||
connector_id: str
|
||||
connector_type: str
|
||||
|
||||
def collect(self, context: ConnectorContext) -> ConnectorResult:
|
||||
"""Collect connector evidence without mutating ``context.snapshot``."""
|
||||
|
||||
|
||||
class PackageRegistryConnector(DiscoveryConnector, Protocol):
|
||||
pass
|
||||
|
||||
|
||||
class ContainerRegistryConnector(DiscoveryConnector, Protocol):
|
||||
pass
|
||||
|
||||
|
||||
class ApiCatalogConnector(DiscoveryConnector, Protocol):
|
||||
pass
|
||||
|
||||
|
||||
class ServiceCatalogConnector(DiscoveryConnector, Protocol):
|
||||
pass
|
||||
|
||||
|
||||
class DeploymentInventoryConnector(DiscoveryConnector, Protocol):
|
||||
pass
|
||||
|
||||
|
||||
class FabricRegistryConnector(DiscoveryConnector, Protocol):
|
||||
pass
|
||||
|
||||
|
||||
class LocalFabricRegistryConnector:
|
||||
connector_id = "local-fabric-registry"
|
||||
connector_type = "fabric_registry"
|
||||
|
||||
def __init__(self, manifest_path: Path) -> None:
|
||||
self.manifest_path = manifest_path
|
||||
|
||||
def collect(self, context: ConnectorContext) -> ConnectorResult:
|
||||
started_at = _utc_now()
|
||||
source = str(self.manifest_path)
|
||||
if not self.manifest_path.is_file():
|
||||
return _connector_result(
|
||||
connector_id=self.connector_id,
|
||||
connector_type=self.connector_type,
|
||||
status="unavailable",
|
||||
source=source,
|
||||
message=f"manifest not found: {self.manifest_path}",
|
||||
started_at=started_at,
|
||||
review_artifacts=[
|
||||
_review_artifact(
|
||||
artifact_type="connector_unavailable",
|
||||
origin="registry",
|
||||
message=f"Local Fabric registry manifest not found: {self.manifest_path}",
|
||||
payload={"source": source},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
try:
|
||||
manifest = load_yaml(self.manifest_path)
|
||||
except Exception as exc:
|
||||
return _connector_result(
|
||||
connector_id=self.connector_id,
|
||||
connector_type=self.connector_type,
|
||||
status="failed",
|
||||
source=source,
|
||||
message=str(exc),
|
||||
started_at=started_at,
|
||||
review_artifacts=[
|
||||
_review_artifact(
|
||||
artifact_type="connector_failed",
|
||||
origin="registry",
|
||||
message=f"Cannot load Local Fabric registry manifest: {exc}",
|
||||
payload={"source": source},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
repositories = manifest.get("repositories") if isinstance(manifest, dict) else None
|
||||
if not isinstance(repositories, list):
|
||||
return _connector_result(
|
||||
connector_id=self.connector_id,
|
||||
connector_type=self.connector_type,
|
||||
status="failed",
|
||||
source=source,
|
||||
message="manifest requires a repositories list",
|
||||
started_at=started_at,
|
||||
review_artifacts=[
|
||||
_review_artifact(
|
||||
artifact_type="connector_failed",
|
||||
origin="registry",
|
||||
message="Local Fabric registry manifest requires a repositories list.",
|
||||
payload={"source": source},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
match_index, match = _manifest_repo(repositories, context.repo_slug)
|
||||
if match is None:
|
||||
return _connector_result(
|
||||
connector_id=self.connector_id,
|
||||
connector_type=self.connector_type,
|
||||
status="skipped",
|
||||
source=source,
|
||||
message=f"repo {context.repo_slug!r} not present in manifest",
|
||||
started_at=started_at,
|
||||
)
|
||||
|
||||
rel_source = _display_path(self.manifest_path, context.repo_path)
|
||||
scope = {
|
||||
"id": replacement_scope_id(
|
||||
context.repo_slug,
|
||||
self.connector_id,
|
||||
"fabric_registry",
|
||||
source_path=rel_source,
|
||||
),
|
||||
"extractor_id": self.connector_id,
|
||||
"source_kind": "fabric_registry",
|
||||
"source_path": rel_source,
|
||||
"mode": "replacement",
|
||||
"description": "Local Fabric registry onboarding manifest evidence.",
|
||||
}
|
||||
anchor = _source_anchor("fabric_registry", rel_source, json_pointer=f"/repositories/{match_index}")
|
||||
provenance = {
|
||||
"extractor_id": self.connector_id,
|
||||
"extractor_version": "0.1.0",
|
||||
"method": "connector",
|
||||
"origin": "registry",
|
||||
}
|
||||
repo_key = _repository_key(context.snapshot, context.repo_slug)
|
||||
entry_key = discovery_stable_key(context.repo_slug, "FabricRegistryEntry", context.repo_slug)
|
||||
node = {
|
||||
"stable_key": entry_key,
|
||||
"kind": "FabricRegistryEntry",
|
||||
"label": str(match.get("name") or context.repo_slug),
|
||||
"repo": context.repo_slug,
|
||||
"domain": str(match.get("domain") or ""),
|
||||
"aliases": _unique_strings([context.repo_slug, match.get("name")]),
|
||||
"attributes": {
|
||||
"registry_slug": context.repo_slug,
|
||||
"registry_name": match.get("name") or "",
|
||||
"registry_domain": match.get("domain") or "",
|
||||
"registry_path": match.get("path") or "",
|
||||
"registry_remote_url": match.get("remote_url") or "",
|
||||
"registry_default_branch": match.get("default_branch") or "",
|
||||
"state_hub_repo_id": match.get("state_hub_repo_id") or "",
|
||||
"declaration_paths": match.get("declaration_paths") if isinstance(match.get("declaration_paths"), list) else [],
|
||||
},
|
||||
"origin": "registry",
|
||||
"review_state": "candidate",
|
||||
"status": "active",
|
||||
"confidence": 0.9,
|
||||
"replacement_scope": scope["id"],
|
||||
"provenance": [provenance],
|
||||
"source_anchors": [anchor],
|
||||
}
|
||||
edge = {
|
||||
"stable_key": relationship_stable_key(repo_key, "cataloged_as", entry_key, evidence_scope=scope["id"]),
|
||||
"edge_type": "cataloged_as",
|
||||
"source_key": repo_key,
|
||||
"target_key": entry_key,
|
||||
"origin": "registry",
|
||||
"review_state": "candidate",
|
||||
"status": "active",
|
||||
"confidence": 0.9,
|
||||
"replacement_scope": scope["id"],
|
||||
"provenance": [provenance],
|
||||
"source_anchors": [anchor],
|
||||
}
|
||||
attributes = [
|
||||
_attribute(repo_key, name, value, scope["id"], provenance, anchor)
|
||||
for name, value in node["attributes"].items()
|
||||
if value not in ("", [], None)
|
||||
]
|
||||
return _connector_result(
|
||||
connector_id=self.connector_id,
|
||||
connector_type=self.connector_type,
|
||||
status="success",
|
||||
source=source,
|
||||
message=f"matched repo {context.repo_slug}",
|
||||
started_at=started_at,
|
||||
replacement_scopes=[scope],
|
||||
candidates={
|
||||
"nodes": [node],
|
||||
"edges": [edge],
|
||||
"attributes": attributes,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def apply_connectors(
|
||||
snapshot: dict[str, Any],
|
||||
*,
|
||||
repo_path: Path,
|
||||
configs: list[ConnectorConfig],
|
||||
) -> dict[str, Any]:
|
||||
result = _copy_json(snapshot)
|
||||
context = ConnectorContext(
|
||||
repo_path=repo_path,
|
||||
repo_slug=str((snapshot.get("source") or {}).get("repo_slug") or repo_path.name),
|
||||
snapshot=result,
|
||||
)
|
||||
for config in configs:
|
||||
if not config.enabled:
|
||||
_merge_connector_result(
|
||||
result,
|
||||
_connector_result(
|
||||
connector_id=config.connector_id,
|
||||
connector_type=config.connector_type,
|
||||
status="skipped",
|
||||
source=config.source_path or "",
|
||||
message="connector disabled",
|
||||
started_at=_utc_now(),
|
||||
),
|
||||
)
|
||||
continue
|
||||
try:
|
||||
connector = create_connector(config)
|
||||
connector_result = connector.collect(context)
|
||||
except Exception as exc:
|
||||
connector_result = _connector_result(
|
||||
connector_id=config.connector_id,
|
||||
connector_type=config.connector_type,
|
||||
status="failed",
|
||||
source=config.source_path or "",
|
||||
message=str(exc),
|
||||
started_at=_utc_now(),
|
||||
review_artifacts=[
|
||||
_review_artifact(
|
||||
artifact_type="connector_failed",
|
||||
origin="registry",
|
||||
message=f"Connector {config.connector_id} failed: {exc}",
|
||||
payload={"connector_type": config.connector_type, "source": config.source_path or ""},
|
||||
)
|
||||
],
|
||||
)
|
||||
_merge_connector_result(result, connector_result)
|
||||
return result
|
||||
|
||||
|
||||
def create_connector(config: ConnectorConfig) -> DiscoveryConnector:
|
||||
if config.connector_id == LocalFabricRegistryConnector.connector_id:
|
||||
source = Path(config.source_path or "registry/local-repos.yaml")
|
||||
return LocalFabricRegistryConnector(source)
|
||||
raise ValueError(f"unknown connector: {config.connector_id}")
|
||||
|
||||
|
||||
def _connector_result(
|
||||
*,
|
||||
connector_id: str,
|
||||
connector_type: str,
|
||||
status: str,
|
||||
source: str,
|
||||
message: str,
|
||||
started_at: str,
|
||||
replacement_scopes: list[dict[str, object]] | None = None,
|
||||
candidates: dict[str, list[dict[str, object]]] | None = None,
|
||||
review_artifacts: list[dict[str, object]] | None = None,
|
||||
) -> ConnectorResult:
|
||||
candidates = candidates or {"nodes": [], "edges": [], "attributes": []}
|
||||
return ConnectorResult(
|
||||
connector_run={
|
||||
"connector_id": connector_id,
|
||||
"connector_type": connector_type,
|
||||
"status": status,
|
||||
"source": source,
|
||||
"message": message,
|
||||
"candidate_counts": {
|
||||
"nodes": len(candidates.get("nodes", [])),
|
||||
"edges": len(candidates.get("edges", [])),
|
||||
"attributes": len(candidates.get("attributes", [])),
|
||||
},
|
||||
"started_at": started_at,
|
||||
"completed_at": _utc_now(),
|
||||
},
|
||||
replacement_scopes=replacement_scopes or [],
|
||||
candidates=candidates,
|
||||
review_artifacts=review_artifacts or [],
|
||||
)
|
||||
|
||||
|
||||
def _merge_connector_result(snapshot: dict[str, Any], result: ConnectorResult) -> None:
|
||||
scopes = {
|
||||
str(scope.get("id")): scope
|
||||
for scope in snapshot.setdefault("replacement_scopes", [])
|
||||
if isinstance(scope, dict)
|
||||
}
|
||||
for scope in result.replacement_scopes:
|
||||
scopes[str(scope["id"])] = scope
|
||||
snapshot["replacement_scopes"] = [scopes[key] for key in sorted(scopes)]
|
||||
|
||||
candidates = snapshot.setdefault("candidates", {"nodes": [], "edges": [], "attributes": []})
|
||||
for collection in ("nodes", "edges", "attributes"):
|
||||
current = {
|
||||
str(item.get("stable_key")): item
|
||||
for item in candidates.setdefault(collection, [])
|
||||
if isinstance(item, dict) and item.get("stable_key")
|
||||
}
|
||||
for incoming in result.candidates.get(collection, []):
|
||||
key = str(incoming.get("stable_key"))
|
||||
current[key] = _merge_candidate(current.get(key), incoming)
|
||||
candidates[collection] = [current[key] for key in sorted(current)]
|
||||
|
||||
if result.review_artifacts:
|
||||
snapshot.setdefault("review_artifacts", []).extend(result.review_artifacts)
|
||||
snapshot.setdefault("connector_runs", []).append(result.connector_run)
|
||||
|
||||
|
||||
def _merge_candidate(existing: dict[str, object] | None, incoming: dict[str, object]) -> dict[str, object]:
|
||||
if existing is None:
|
||||
return incoming
|
||||
merged = {**existing}
|
||||
for field in ("aliases", "provenance", "source_anchors"):
|
||||
values = [*list(existing.get(field, [])), *list(incoming.get(field, []))]
|
||||
if values:
|
||||
merged[field] = _unique_json(values) if field != "aliases" else _unique_strings(values)
|
||||
if isinstance(existing.get("attributes"), dict) or isinstance(incoming.get("attributes"), dict):
|
||||
merged["attributes"] = {
|
||||
**(existing.get("attributes") if isinstance(existing.get("attributes"), dict) else {}),
|
||||
**(incoming.get("attributes") if isinstance(incoming.get("attributes"), dict) else {}),
|
||||
}
|
||||
if isinstance(existing.get("confidence"), (int, float)) and isinstance(incoming.get("confidence"), (int, float)):
|
||||
merged["confidence"] = max(float(existing["confidence"]), float(incoming["confidence"]))
|
||||
return merged
|
||||
|
||||
|
||||
def _manifest_repo(repositories: list[object], repo_slug: str) -> tuple[int, dict[str, object] | None]:
|
||||
for index, item in enumerate(repositories):
|
||||
if isinstance(item, dict) and item.get("slug") == repo_slug:
|
||||
return index, item
|
||||
return -1, None
|
||||
|
||||
|
||||
def _repository_key(snapshot: dict[str, Any], repo_slug: str) -> str:
|
||||
candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {}
|
||||
for node in candidates.get("nodes", []):
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
if node.get("kind") == "Repository":
|
||||
return str(node.get("stable_key"))
|
||||
return discovery_stable_key(repo_slug, "Repository", repo_slug)
|
||||
|
||||
|
||||
def _attribute(
|
||||
entity_key: str,
|
||||
name: str,
|
||||
value: object,
|
||||
replacement_scope: str,
|
||||
provenance: dict[str, object],
|
||||
source_anchor: dict[str, object],
|
||||
) -> dict[str, object]:
|
||||
return {
|
||||
"stable_key": attribute_stable_key(entity_key, name),
|
||||
"entity_key": entity_key,
|
||||
"name": name,
|
||||
"value": _json_value(value),
|
||||
"origin": "registry",
|
||||
"review_state": "candidate",
|
||||
"confidence": 0.9,
|
||||
"replacement_scope": replacement_scope,
|
||||
"provenance": [provenance],
|
||||
"source_anchors": [source_anchor],
|
||||
}
|
||||
|
||||
|
||||
def _source_anchor(source_kind: str, path: str, *, json_pointer: str | None = None) -> dict[str, object]:
|
||||
anchor: dict[str, object] = {"source_kind": source_kind, "path": path}
|
||||
if json_pointer:
|
||||
anchor["json_pointer"] = json_pointer
|
||||
anchor["fingerprint"] = source_fingerprint(anchor)
|
||||
return anchor
|
||||
|
||||
|
||||
def _review_artifact(
|
||||
*,
|
||||
artifact_type: str,
|
||||
origin: str,
|
||||
message: str,
|
||||
payload: dict[str, object],
|
||||
) -> dict[str, object]:
|
||||
body = {
|
||||
"artifact_type": artifact_type,
|
||||
"origin": origin,
|
||||
"message": message,
|
||||
"payload": payload,
|
||||
"created_at": _utc_now(),
|
||||
}
|
||||
return {"id": f"review:{short_fingerprint(body, length=20)}", **body}
|
||||
|
||||
|
||||
def _display_path(path: Path, repo_path: Path) -> str:
|
||||
try:
|
||||
return path.resolve().relative_to(repo_path.resolve()).as_posix()
|
||||
except ValueError:
|
||||
return str(path)
|
||||
|
||||
|
||||
def _json_value(value: object) -> object:
|
||||
if value is None or isinstance(value, (str, int, float, bool)):
|
||||
return value
|
||||
if isinstance(value, list):
|
||||
return [_json_value(item) for item in value]
|
||||
if isinstance(value, dict):
|
||||
return {str(key): _json_value(item) for key, item in value.items()}
|
||||
return str(value)
|
||||
|
||||
|
||||
def _unique_strings(values: list[object]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
text = str(value or "").strip()
|
||||
if not text or text in seen:
|
||||
continue
|
||||
seen.add(text)
|
||||
result.append(text)
|
||||
return result
|
||||
|
||||
|
||||
def _unique_json(values: list[object]) -> list[object]:
|
||||
seen: set[str] = set()
|
||||
result: list[object] = []
|
||||
for value in values:
|
||||
key = json.dumps(value, sort_keys=True, default=str)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
result.append(value)
|
||||
return result
|
||||
|
||||
|
||||
def _copy_json(value: Any) -> Any:
|
||||
return json.loads(json.dumps(value, default=str))
|
||||
|
||||
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
||||
@@ -4,13 +4,14 @@ import json
|
||||
import re
|
||||
import subprocess
|
||||
import tomllib
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
import yaml
|
||||
|
||||
from .connectors import ConnectorConfig, apply_connectors
|
||||
from .discovery import (
|
||||
attribute_stable_key,
|
||||
discovery_stable_key,
|
||||
@@ -89,6 +90,7 @@ class ScanOptions:
|
||||
llm_enabled: bool = False
|
||||
llm_config: LLMExtractionConfig | None = None
|
||||
llm_adapter: object | None = None
|
||||
connectors: list[ConnectorConfig] = field(default_factory=list)
|
||||
|
||||
|
||||
class CandidateAccumulator:
|
||||
@@ -319,6 +321,12 @@ def scan_repo(options: ScanOptions | Path, **overrides: object) -> dict[str, obj
|
||||
"retirement_policy": "missing candidates retire only inside their replacement scope",
|
||||
},
|
||||
}
|
||||
if options.connectors:
|
||||
snapshot = apply_connectors(
|
||||
snapshot,
|
||||
repo_path=repo_path,
|
||||
configs=options.connectors,
|
||||
)
|
||||
if options.llm_enabled:
|
||||
return augment_snapshot_with_llm(
|
||||
snapshot,
|
||||
|
||||
@@ -101,6 +101,10 @@ properties:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/$defs/reviewArtifact"
|
||||
connector_runs:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/$defs/connectorRun"
|
||||
reconciliation:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
@@ -538,6 +542,9 @@ $defs:
|
||||
- llm_low_confidence
|
||||
- llm_candidate_unresolved
|
||||
- llm_execution_error
|
||||
- connector_failed
|
||||
- connector_rate_limited
|
||||
- connector_unavailable
|
||||
origin:
|
||||
$ref: "#/$defs/origin"
|
||||
message:
|
||||
@@ -554,3 +561,56 @@ $defs:
|
||||
created_at:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
connectorRun:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- connector_id
|
||||
- connector_type
|
||||
- status
|
||||
properties:
|
||||
connector_id:
|
||||
type: string
|
||||
minLength: 1
|
||||
connector_type:
|
||||
type: string
|
||||
enum:
|
||||
- package_registry
|
||||
- container_registry
|
||||
- api_catalog
|
||||
- service_catalog
|
||||
- deployment_inventory
|
||||
- fabric_registry
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- success
|
||||
- partial
|
||||
- unavailable
|
||||
- rate_limited
|
||||
- failed
|
||||
- skipped
|
||||
source:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
candidate_counts:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
nodes:
|
||||
type: integer
|
||||
minimum: 0
|
||||
edges:
|
||||
type: integer
|
||||
minimum: 0
|
||||
attributes:
|
||||
type: integer
|
||||
minimum: 0
|
||||
started_at:
|
||||
type: string
|
||||
format: date-time
|
||||
completed_at:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
149
tests/test_connectors.py
Normal file
149
tests/test_connectors.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from railiance_fabric.cli import main as cli_main
|
||||
from railiance_fabric.connectors import CONNECTOR_TYPES, ConnectorConfig
|
||||
from railiance_fabric.scanner import ScanOptions, scan_repo
|
||||
from railiance_fabric.schema_validation import draft202012_validator
|
||||
|
||||
|
||||
def test_connector_interfaces_cover_followup_catalog_types() -> None:
|
||||
assert set(CONNECTOR_TYPES) >= {
|
||||
"package_registry",
|
||||
"container_registry",
|
||||
"api_catalog",
|
||||
"service_catalog",
|
||||
"deployment_inventory",
|
||||
"fabric_registry",
|
||||
}
|
||||
|
||||
|
||||
def test_local_fabric_registry_connector_adds_separate_registry_evidence(tmp_path: Path) -> None:
|
||||
repo = _minimal_repo(tmp_path)
|
||||
manifest = _manifest(tmp_path)
|
||||
|
||||
snapshot = scan_repo(
|
||||
ScanOptions(
|
||||
repo_path=repo,
|
||||
repo_slug="fixture-repo",
|
||||
repo_name="Fixture Repo",
|
||||
commit="abc123",
|
||||
connectors=[
|
||||
ConnectorConfig(
|
||||
connector_id="local-fabric-registry",
|
||||
connector_type="fabric_registry",
|
||||
source_path=str(manifest),
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
_validate_schema("discovery-snapshot.schema.yaml", snapshot)
|
||||
assert snapshot["connector_runs"][0]["status"] == "success"
|
||||
assert snapshot["connector_runs"][0]["candidate_counts"] == {"nodes": 1, "edges": 1, "attributes": 8}
|
||||
assert any(scope["source_kind"] == "fabric_registry" for scope in snapshot["replacement_scopes"])
|
||||
|
||||
registry_node = next(node for node in snapshot["candidates"]["nodes"] if node["kind"] == "FabricRegistryEntry")
|
||||
assert registry_node["origin"] == "registry"
|
||||
assert registry_node["review_state"] == "candidate"
|
||||
assert registry_node["replacement_scope"].startswith("scope:fixture-repo:local-fabric-registry:fabric_registry")
|
||||
assert registry_node["attributes"]["state_hub_repo_id"] == "state-hub-id"
|
||||
assert any(edge["edge_type"] == "cataloged_as" for edge in snapshot["candidates"]["edges"])
|
||||
registry_attributes = {
|
||||
attribute["name"]: attribute
|
||||
for attribute in snapshot["candidates"]["attributes"]
|
||||
if attribute["origin"] == "registry"
|
||||
}
|
||||
assert registry_attributes["registry_remote_url"]["value"] == "gitea-remote:coulomb/fixture-repo.git"
|
||||
assert registry_attributes["registry_default_branch"]["source_anchors"][0]["source_kind"] == "fabric_registry"
|
||||
|
||||
|
||||
def test_connector_failure_becomes_review_artifact_without_corrupting_scan(tmp_path: Path) -> None:
|
||||
repo = _minimal_repo(tmp_path)
|
||||
snapshot = scan_repo(
|
||||
ScanOptions(
|
||||
repo_path=repo,
|
||||
repo_slug="fixture-repo",
|
||||
repo_name="Fixture Repo",
|
||||
commit="abc123",
|
||||
connectors=[
|
||||
ConnectorConfig(
|
||||
connector_id="local-fabric-registry",
|
||||
connector_type="fabric_registry",
|
||||
source_path=str(tmp_path / "missing.yaml"),
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
_validate_schema("discovery-snapshot.schema.yaml", snapshot)
|
||||
assert snapshot["connector_runs"][0]["status"] == "unavailable"
|
||||
assert snapshot["review_artifacts"][0]["artifact_type"] == "connector_unavailable"
|
||||
assert any(node["kind"] == "Repository" for node in snapshot["candidates"]["nodes"])
|
||||
|
||||
|
||||
def test_scan_cli_can_enable_local_fabric_registry_connector(tmp_path: Path, capsys) -> None:
|
||||
repo = _minimal_repo(tmp_path)
|
||||
manifest = _manifest(tmp_path)
|
||||
output = tmp_path / "snapshot.json"
|
||||
|
||||
assert cli_main(
|
||||
[
|
||||
"scan",
|
||||
str(repo),
|
||||
"--repo-slug",
|
||||
"fixture-repo",
|
||||
"--repo-name",
|
||||
"Fixture Repo",
|
||||
"--commit",
|
||||
"abc123",
|
||||
"--connector",
|
||||
"local-fabric-registry",
|
||||
"--connector-manifest",
|
||||
str(manifest),
|
||||
"--output",
|
||||
str(output),
|
||||
]
|
||||
) == 0
|
||||
|
||||
summary = capsys.readouterr().out
|
||||
assert "1 connector run(s)" in summary
|
||||
payload = json.loads(output.read_text(encoding="utf-8"))
|
||||
_validate_schema("discovery-snapshot.schema.yaml", payload)
|
||||
assert payload["connector_runs"][0]["status"] == "success"
|
||||
|
||||
|
||||
def _minimal_repo(tmp_path: Path) -> Path:
|
||||
repo = tmp_path / "fixture-repo"
|
||||
repo.mkdir()
|
||||
(repo / "README.md").write_text("# Fixture Repo\n", encoding="utf-8")
|
||||
return repo
|
||||
|
||||
|
||||
def _manifest(tmp_path: Path) -> Path:
|
||||
manifest = tmp_path / "local-repos.yaml"
|
||||
manifest.write_text(
|
||||
"""
|
||||
apiVersion: railiance.fabric/v1alpha1
|
||||
kind: RegistryOnboardingManifest
|
||||
repositories:
|
||||
- slug: fixture-repo
|
||||
name: Fixture Repo
|
||||
domain: testing
|
||||
path: /tmp/fixture-repo
|
||||
default_branch: main
|
||||
state_hub_repo_id: state-hub-id
|
||||
remote_url: gitea-remote:coulomb/fixture-repo.git
|
||||
declaration_paths:
|
||||
- /tmp/fixture-repo
|
||||
""".lstrip(),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return manifest
|
||||
|
||||
|
||||
def _validate_schema(schema_name: str, payload: dict[str, object]) -> None:
|
||||
validator = draft202012_validator(Path("schemas") / schema_name)
|
||||
validator.validate(payload)
|
||||
@@ -223,7 +223,7 @@ Acceptance notes:
|
||||
|
||||
```task
|
||||
id: RAIL-FAB-WP-0010-T05
|
||||
status: todo
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "d664301d-c531-4cf8-a1dd-cbadda0e0fdb"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user