feat(connectors): complete ATLAS-WP-0003 — discovery connectors (Phase 2)
Some checks failed
validate-registry / validate (push) Has been cancelled

T01 connector_base + docs/discovery-connectors.md (read-only/stateless,
candidate->PR->promote; `candidate` added to schema status enum; candidates/
gitignored, excluded from gate).
T02 connector_reposcoping (repo-scoping facts -> candidates; graceful degrade).
T03 connector_gitconfig (deterministic scan; real .env -> secret-ref, no values;
verified 4 real candidates from ~/state-hub).
T04 connector_featurecontrol (feature-flag surfaces linking to feature-control
keys, no eval logic; FR-12).
T05 registry_health (unowned + stale detection).
Make targets: connect-gitconfig/reposcoping/featurecontrol, registry-health.

WP-0003 finished (5/5).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-27 00:27:57 +02:00
parent d1a9da926e
commit bc702db4cf
10 changed files with 571 additions and 7 deletions

91
tools/connector_base.py Normal file
View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""Shared base for read-only discovery connectors (ATLAS-WP-0003).
A connector scans a source and emits *candidate* surface entries for human/agent
PR review. Connectors are stateless and read-only: they NEVER write a source
system, NEVER auto-merge, and NEVER read or store configuration values or secret
values (PRD FR-8; docs/discovery-connectors.md).
Candidates are written to registry/surfaces/candidates/<id>.md with
`status: candidate` and provenance in `evidence`. A candidate is never written if a
promoted entry with the same id already exists (the registry is the source of
truth; connectors propose, they do not overwrite).
"""
from __future__ import annotations
import datetime as _dt
import json
from pathlib import Path
try:
import yaml
from jsonschema import Draft202012Validator
except ImportError as exc: # pragma: no cover
raise SystemExit(f"setup error: missing dependency ({exc}). pip install pyyaml jsonschema")
ROOT = Path(__file__).resolve().parent.parent
SCHEMA_PATH = ROOT / "schemas" / "surface-entry.schema.json"
SURFACES_DIR = ROOT / "registry" / "surfaces"
CANDIDATES_DIR = SURFACES_DIR / "candidates"
_VALIDATOR = Draft202012Validator(json.loads(SCHEMA_PATH.read_text()))
TODAY = _dt.date.today().isoformat()
def promoted_ids() -> set[str]:
"""Ids of already-promoted (non-candidate) surface entries."""
return {p.stem for p in SURFACES_DIR.glob("*.md")}
def validate_entry(entry: dict) -> list[str]:
return [f"{'/'.join(str(p) for p in e.path) or '(root)'}: {e.message}"
for e in _VALIDATOR.iter_errors(entry)]
def emit_candidate(entry: dict, *, connector: str, body: str = "") -> tuple[str, Path | None]:
"""Validate and write one candidate. Returns (status_message, path|None).
status_message is one of: 'written', 'skipped (promoted)', 'invalid: ...'.
"""
entry = dict(entry)
entry["status"] = "candidate"
ev = dict(entry.get("evidence", {}) or {})
ev.setdefault("discovery_method", f"connector:{connector}")
ev.setdefault("last_seen", TODAY)
entry["evidence"] = ev
sid = entry.get("id", "<no-id>")
if sid in promoted_ids():
return (f"skipped (promoted): {sid}", None)
errs = validate_entry(entry)
if errs:
return (f"invalid: {sid}: {errs[0]}", None)
CANDIDATES_DIR.mkdir(parents=True, exist_ok=True)
fm = yaml.safe_dump(entry, sort_keys=False).strip()
text = f"---\n{fm}\n---\n\n# {entry.get('name', sid)} (candidate)\n\n"
text += body or (
f"Discovered by `{connector}`. Review, refine, and promote to "
f"`registry/surfaces/{sid}.md` + `surfaces.yaml`, or reject.\n"
)
path = CANDIDATES_DIR / f"{sid}.md"
path.write_text(text)
return (f"written: {sid}", path)
def run_connector(name: str, candidates: list[tuple[dict, str]]) -> int:
"""Emit a batch; print a summary. candidates = list of (entry, body)."""
if not candidates:
print(f"{name}: no candidates discovered (source empty or unavailable)")
return 0
written = skipped = invalid = 0
for entry, body in candidates:
msg, _ = emit_candidate(entry, connector=name, body=body)
print(f" {msg}")
written += msg.startswith("written")
skipped += msg.startswith("skipped")
invalid += msg.startswith("invalid")
print(f"{name}: {written} written, {skipped} skipped, {invalid} invalid "
f"-> registry/surfaces/candidates/")
return 1 if invalid else 0

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""feature-control flag connector (ATLAS-WP-0003-T04).
Inventory feature-control keys and emit `feature-flag` surfaces that LINK to the
authoritative feature-control key (`sources[].role: feature-control-key`) and
contain no evaluation logic (PRD FR-12 delegation boundary). Read-only.
Source resolution (first available):
1. --keys <file> : newline- or yaml-list of feature keys
2. ~/feature-control/registry/indexes/feature-keys.yaml (if present)
Degrades gracefully when feature-control has no key registry yet (planning phase).
Usage:
python3 tools/connector_featurecontrol.py [--keys keys.yaml]
make connect-featurecontrol
"""
from __future__ import annotations
import sys
from pathlib import Path
try:
import yaml
except ImportError as exc: # pragma: no cover
raise SystemExit(f"setup error: missing PyYAML ({exc})")
from connector_base import run_connector
FC_KEYS = Path.home() / "feature-control" / "registry" / "indexes" / "feature-keys.yaml"
def _load_keys(keys_file: str | None) -> list[str]:
src = Path(keys_file) if keys_file else FC_KEYS
if not src.exists():
print(f"feature-control: no key registry at {src} (planning phase — none yet)")
return []
raw = src.read_text()
try:
data = yaml.safe_load(raw)
except yaml.YAMLError:
data = None
if isinstance(data, dict):
keys = data.get("keys") or data.get("feature_keys") or []
elif isinstance(data, list):
keys = data
else:
keys = [ln.strip() for ln in raw.splitlines() if ln.strip() and not ln.startswith("#")]
return [str(k) for k in keys]
def keys_to_candidates(keys: list[str]) -> list[tuple[dict, str]]:
out: list[tuple[dict, str]] = []
for key in keys:
slug = key.replace(".", "-").replace("_", "-").lower()
sid = f"surface.infotech.feature-control.{slug}"
entry = {
"id": sid,
"name": f"feature flag: {key}",
"kind": "feature-flag",
"summary": f"Runtime feature availability controlled by feature-control key `{key}`.",
"owner": "feature-control",
"scope": {"allowed_layers": ["company", "environment", "tenant", "user"],
"default_layer": "company"},
"mutability": "hot-reloadable",
"security_class": "operational",
"sources": [{"repo": "feature-control", "endpoint": f"openfeature:{key}",
"role": "feature-control-key"}],
"relations": {"related_to": []},
}
out.append((entry, f"Links to feature-control key `{key}`. config-atlas maps "
f"the flag; feature-control owns evaluation. Promote or reject.\n"))
return out
def main(argv: list[str]) -> int:
keys_file = None
if "--keys" in argv:
i = argv.index("--keys")
keys_file = argv[i + 1] if i + 1 < len(argv) else None
return run_connector("feature-control", keys_to_candidates(_load_keys(keys_file)))
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parent))
raise SystemExit(main(sys.argv[1:]))

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""git-config deterministic scanner (ATLAS-WP-0003-T03).
Scan a repository for configuration files and emit candidate surface entries.
Records file *locations* and infers kind/scope; it NEVER reads or stores config
values, and NEVER reads real secret files (only committed *.example / values /
config files). Real `.env` is treated as a secret-bearing source -> a secret-ref
candidate with no value.
Usage:
python3 tools/connector_gitconfig.py <repo-slug> [repo-path]
make connect-gitconfig REPO=state-hub
"""
from __future__ import annotations
import sys
from pathlib import Path
from connector_base import run_connector
# (glob, kind, role) — order matters; first match wins per file.
PATTERNS = [
("**/values*.yaml", "deploy-config", "installation-overlay"),
("**/values*.yml", "deploy-config", "installation-overlay"),
("**/*.env.example", "app-config", "company-baseline"),
("**/config*.yaml", "app-config", "company-baseline"),
("**/config*.yml", "app-config", "company-baseline"),
("**/settings*.yaml", "app-config", "company-baseline"),
]
SKIP_DIRS = {".git", "node_modules", ".venv", "venv", "__pycache__", "dist", "build"}
def _slugify(rel: str) -> str:
out = rel.replace("/", "-").replace(".", "-").replace("_", "-").lower()
return "-".join(filter(None, out.split("-")))
def scan(repo_slug: str, repo_path: Path) -> list[tuple[dict, str]]:
out: list[tuple[dict, str]] = []
seen: set[str] = set()
def add(rel: str, kind: str, role: str, secref: bool = False):
sid = f"surface.infotech.{repo_slug}.{_slugify(rel)}"
if sid in seen:
return
seen.add(rel)
entry = {
"id": sid,
"name": f"{repo_slug}: {rel}",
"kind": "secret-ref" if secref else kind,
"summary": f"Configuration surface discovered at {rel} in {repo_slug}.",
"owner": repo_slug,
"scope": {"allowed_layers": ["company", "environment", "installation"],
"default_layer": "company"},
"mutability": "deploy-time",
"security_class": "secret-ref" if secref else "operational",
"sources": [{"repo": repo_slug, "path": rel, "role": role}],
}
body = (f"Discovered by `git-config` scanning `{repo_slug}`. Source: `{rel}`.\n"
f"No values were read. Review kind/scope/owner and promote or reject.\n")
out.append((entry, body))
for glob, kind, role in PATTERNS:
for f in repo_path.glob(glob):
if not f.is_file() or any(part in SKIP_DIRS for part in f.parts):
continue
add(str(f.relative_to(repo_path)), kind, role)
# Real .env => secret-bearing source; record as secret-ref, never read it.
for f in repo_path.glob("**/.env"):
if f.is_file() and not any(part in SKIP_DIRS for part in f.parts):
add(str(f.relative_to(repo_path)), "secret-ref", "company-baseline", secref=True)
return out
def main(argv: list[str]) -> int:
if not argv:
print(__doc__)
return 2
slug = argv[0]
path = Path(argv[1]) if len(argv) > 1 else Path.home() / slug
if not path.is_dir():
print(f"error: repo path not found: {path}", file=sys.stderr)
return 1
return run_connector("git-config", scan(slug, path))
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parent))
raise SystemExit(main(sys.argv[1:]))

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""repo-scoping fact ingestion connector (ATLAS-WP-0003-T02).
Consume repo-scoping observed facts/evidence as connector input and emit candidate
configuration surfaces, adding only config-kind/layer classification on top
(ecosystem-boundaries §2.4 option a). Read-only: zero writes to repo-scoping or the
scanned repo.
Source resolution (first available):
1. --facts <file.json> : a repo-scoping facts export (list of fact objects)
2. REPO_SCOPING_URL env : GET {url}/repos/{slug}/facts
Degrades gracefully (emits nothing) when no source is available.
Usage:
python3 tools/connector_reposcoping.py <repo-slug> [--facts facts.json]
make connect-reposcoping REPO=state-hub
"""
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from connector_base import run_connector
CONFIG_HINTS = ("config", "env", "settings", "values", ".yaml", ".yml", ".toml", ".ini")
def _load_facts(slug: str, facts_file: str | None) -> list[dict]:
if facts_file:
p = Path(facts_file)
if p.exists():
data = json.loads(p.read_text())
return data if isinstance(data, list) else data.get("facts", [])
print(f"repo-scoping: facts file not found: {facts_file}", file=sys.stderr)
return []
url = os.environ.get("REPO_SCOPING_URL")
if url:
try:
import urllib.request
with urllib.request.urlopen(f"{url}/repos/{slug}/facts", timeout=5) as r:
data = json.loads(r.read())
return data if isinstance(data, list) else data.get("facts", [])
except Exception as exc: # noqa: BLE001
print(f"repo-scoping: API unavailable ({exc})", file=sys.stderr)
return []
print("repo-scoping: no --facts file and REPO_SCOPING_URL unset; nothing to ingest")
return []
def _is_config_fact(fact: dict) -> bool:
blob = (str(fact.get("path", "")) + " " + str(fact.get("kind", "")) + " "
+ str(fact.get("summary", ""))).lower()
return any(h in blob for h in CONFIG_HINTS)
def facts_to_candidates(slug: str, facts: list[dict]) -> list[tuple[dict, str]]:
out: list[tuple[dict, str]] = []
for fact in facts:
if not _is_config_fact(fact):
continue
rel = str(fact.get("path", "")).strip("/")
if not rel:
continue
stem = rel.replace("/", "-").replace(".", "-").replace("_", "-").lower()
sid = f"surface.infotech.{slug}.{stem}"
entry = {
"id": sid,
"name": f"{slug}: {rel}",
"kind": "app-config",
"summary": fact.get("summary") or f"Config surface from repo-scoping fact at {rel}.",
"owner": slug,
"scope": {"allowed_layers": ["company", "environment", "installation"],
"default_layer": "company"},
"mutability": "deploy-time",
"security_class": "operational",
"sources": [{"repo": slug, "path": rel, "role": "company-baseline"}],
"evidence": {"discovery_method": "connector:repo-scoping",
"change_log_ref": str(fact.get("id", ""))},
}
out.append((entry, f"Ingested from repo-scoping fact `{fact.get('id','?')}` "
f"({rel}). Classify kind/scope and promote or reject.\n"))
return out
def main(argv: list[str]) -> int:
if not argv:
print(__doc__)
return 2
slug = argv[0]
facts_file = None
if "--facts" in argv:
i = argv.index("--facts")
facts_file = argv[i + 1] if i + 1 < len(argv) else None
return run_connector("repo-scoping", facts_to_candidates(slug, _load_facts(slug, facts_file)))
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parent))
raise SystemExit(main(sys.argv[1:]))

84
tools/registry_health.py Normal file
View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""Registry health: stale & unowned surface detection (ATLAS-WP-0003-T05).
Reports promoted surfaces that need attention:
- unowned : missing owner, or owner not resolvable to a known identity
- stale : evidence.last_seen older than --max-age-days (default 180), or absent
Ownership resolution uses reuse-surface's local-repo-roster as a stand-in for
domain-tree identities (until domain-tree binding is wired, ATLAS-WP-0002 follow-up).
Usage:
python3 tools/registry_health.py [--max-age-days N] [--strict]
make registry-health
Exit 0 normally; 1 when --strict and any issue is found.
"""
from __future__ import annotations
import datetime as _dt
import sys
from pathlib import Path
try:
import yaml
except ImportError as exc: # pragma: no cover
raise SystemExit(f"setup error: missing PyYAML ({exc})")
from effective_config import SURFACES_DIR, load_entry
ROSTER = Path.home() / "reuse-surface" / "registry" / "federation" / "local-repo-roster.yaml"
def known_owners() -> set[str]:
owners = {"custodian"} # State Hub domain identity not in the repo roster
if ROSTER.exists():
data = yaml.safe_load(ROSTER.read_text()) or {}
owners |= {r.get("slug") for r in data.get("repos", []) if r.get("slug")}
return owners
def main(argv: list[str]) -> int:
max_age = 180
strict = "--strict" in argv
if "--max-age-days" in argv:
i = argv.index("--max-age-days")
max_age = int(argv[i + 1])
cutoff = _dt.date.today() - _dt.timedelta(days=max_age)
owners = known_owners()
unowned: list[str] = []
stale: list[str] = []
for p in sorted(SURFACES_DIR.glob("*.md")):
e = load_entry(p)
sid = e.get("id", p.stem)
owner = e.get("owner")
if not owner:
unowned.append(f"{sid}: missing owner")
elif owner not in owners:
unowned.append(f"{sid}: owner '{owner}' not resolvable to a known identity")
seen = (e.get("evidence", {}) or {}).get("last_seen")
if not seen:
stale.append(f"{sid}: no evidence.last_seen")
else:
try:
if _dt.date.fromisoformat(str(seen)) < cutoff:
stale.append(f"{sid}: last_seen {seen} older than {max_age}d")
except ValueError:
stale.append(f"{sid}: unparseable last_seen '{seen}'")
total = len(list(SURFACES_DIR.glob("*.md")))
print(f"registry health: {total} promoted surface(s)")
print(f" unowned/unresolved: {len(unowned)}")
for u in unowned:
print(f" - {u}")
print(f" stale (> {max_age}d): {len(stale)}")
for s in stale:
print(f" - {s}")
if not unowned and not stale:
print(" all surfaces owned and fresh.")
return 1 if (strict and (unowned or stale)) else 0
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parent))
raise SystemExit(main(sys.argv[1:]))