generated from coulomb/repo-seed
feat(connectors): complete ATLAS-WP-0003 — discovery connectors (Phase 2)
Some checks failed
validate-registry / validate (push) Has been cancelled
Some checks failed
validate-registry / validate (push) Has been cancelled
T01 connector_base + docs/discovery-connectors.md (read-only/stateless, candidate->PR->promote; `candidate` added to schema status enum; candidates/ gitignored, excluded from gate). T02 connector_reposcoping (repo-scoping facts -> candidates; graceful degrade). T03 connector_gitconfig (deterministic scan; real .env -> secret-ref, no values; verified 4 real candidates from ~/state-hub). T04 connector_featurecontrol (feature-flag surfaces linking to feature-control keys, no eval logic; FR-12). T05 registry_health (unowned + stale detection). Make targets: connect-gitconfig/reposcoping/featurecontrol, registry-health. WP-0003 finished (5/5). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
91
tools/connector_base.py
Normal file
91
tools/connector_base.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Shared base for read-only discovery connectors (ATLAS-WP-0003).
|
||||
|
||||
A connector scans a source and emits *candidate* surface entries for human/agent
|
||||
PR review. Connectors are stateless and read-only: they NEVER write a source
|
||||
system, NEVER auto-merge, and NEVER read or store configuration values or secret
|
||||
values (PRD FR-8; docs/discovery-connectors.md).
|
||||
|
||||
Candidates are written to registry/surfaces/candidates/<id>.md with
|
||||
`status: candidate` and provenance in `evidence`. A candidate is never written if a
|
||||
promoted entry with the same id already exists (the registry is the source of
|
||||
truth; connectors propose, they do not overwrite).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
from jsonschema import Draft202012Validator
|
||||
except ImportError as exc: # pragma: no cover
|
||||
raise SystemExit(f"setup error: missing dependency ({exc}). pip install pyyaml jsonschema")
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
SCHEMA_PATH = ROOT / "schemas" / "surface-entry.schema.json"
|
||||
SURFACES_DIR = ROOT / "registry" / "surfaces"
|
||||
CANDIDATES_DIR = SURFACES_DIR / "candidates"
|
||||
|
||||
_VALIDATOR = Draft202012Validator(json.loads(SCHEMA_PATH.read_text()))
|
||||
TODAY = _dt.date.today().isoformat()
|
||||
|
||||
|
||||
def promoted_ids() -> set[str]:
|
||||
"""Ids of already-promoted (non-candidate) surface entries."""
|
||||
return {p.stem for p in SURFACES_DIR.glob("*.md")}
|
||||
|
||||
|
||||
def validate_entry(entry: dict) -> list[str]:
|
||||
return [f"{'/'.join(str(p) for p in e.path) or '(root)'}: {e.message}"
|
||||
for e in _VALIDATOR.iter_errors(entry)]
|
||||
|
||||
|
||||
def emit_candidate(entry: dict, *, connector: str, body: str = "") -> tuple[str, Path | None]:
|
||||
"""Validate and write one candidate. Returns (status_message, path|None).
|
||||
|
||||
status_message is one of: 'written', 'skipped (promoted)', 'invalid: ...'.
|
||||
"""
|
||||
entry = dict(entry)
|
||||
entry["status"] = "candidate"
|
||||
ev = dict(entry.get("evidence", {}) or {})
|
||||
ev.setdefault("discovery_method", f"connector:{connector}")
|
||||
ev.setdefault("last_seen", TODAY)
|
||||
entry["evidence"] = ev
|
||||
|
||||
sid = entry.get("id", "<no-id>")
|
||||
if sid in promoted_ids():
|
||||
return (f"skipped (promoted): {sid}", None)
|
||||
|
||||
errs = validate_entry(entry)
|
||||
if errs:
|
||||
return (f"invalid: {sid}: {errs[0]}", None)
|
||||
|
||||
CANDIDATES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
fm = yaml.safe_dump(entry, sort_keys=False).strip()
|
||||
text = f"---\n{fm}\n---\n\n# {entry.get('name', sid)} (candidate)\n\n"
|
||||
text += body or (
|
||||
f"Discovered by `{connector}`. Review, refine, and promote to "
|
||||
f"`registry/surfaces/{sid}.md` + `surfaces.yaml`, or reject.\n"
|
||||
)
|
||||
path = CANDIDATES_DIR / f"{sid}.md"
|
||||
path.write_text(text)
|
||||
return (f"written: {sid}", path)
|
||||
|
||||
|
||||
def run_connector(name: str, candidates: list[tuple[dict, str]]) -> int:
|
||||
"""Emit a batch; print a summary. candidates = list of (entry, body)."""
|
||||
if not candidates:
|
||||
print(f"{name}: no candidates discovered (source empty or unavailable)")
|
||||
return 0
|
||||
written = skipped = invalid = 0
|
||||
for entry, body in candidates:
|
||||
msg, _ = emit_candidate(entry, connector=name, body=body)
|
||||
print(f" {msg}")
|
||||
written += msg.startswith("written")
|
||||
skipped += msg.startswith("skipped")
|
||||
invalid += msg.startswith("invalid")
|
||||
print(f"{name}: {written} written, {skipped} skipped, {invalid} invalid "
|
||||
f"-> registry/surfaces/candidates/")
|
||||
return 1 if invalid else 0
|
||||
85
tools/connector_featurecontrol.py
Normal file
85
tools/connector_featurecontrol.py
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
"""feature-control flag connector (ATLAS-WP-0003-T04).
|
||||
|
||||
Inventory feature-control keys and emit `feature-flag` surfaces that LINK to the
|
||||
authoritative feature-control key (`sources[].role: feature-control-key`) and
|
||||
contain no evaluation logic (PRD FR-12 delegation boundary). Read-only.
|
||||
|
||||
Source resolution (first available):
|
||||
1. --keys <file> : newline- or yaml-list of feature keys
|
||||
2. ~/feature-control/registry/indexes/feature-keys.yaml (if present)
|
||||
Degrades gracefully when feature-control has no key registry yet (planning phase).
|
||||
|
||||
Usage:
|
||||
python3 tools/connector_featurecontrol.py [--keys keys.yaml]
|
||||
make connect-featurecontrol
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError as exc: # pragma: no cover
|
||||
raise SystemExit(f"setup error: missing PyYAML ({exc})")
|
||||
|
||||
from connector_base import run_connector
|
||||
|
||||
FC_KEYS = Path.home() / "feature-control" / "registry" / "indexes" / "feature-keys.yaml"
|
||||
|
||||
|
||||
def _load_keys(keys_file: str | None) -> list[str]:
|
||||
src = Path(keys_file) if keys_file else FC_KEYS
|
||||
if not src.exists():
|
||||
print(f"feature-control: no key registry at {src} (planning phase — none yet)")
|
||||
return []
|
||||
raw = src.read_text()
|
||||
try:
|
||||
data = yaml.safe_load(raw)
|
||||
except yaml.YAMLError:
|
||||
data = None
|
||||
if isinstance(data, dict):
|
||||
keys = data.get("keys") or data.get("feature_keys") or []
|
||||
elif isinstance(data, list):
|
||||
keys = data
|
||||
else:
|
||||
keys = [ln.strip() for ln in raw.splitlines() if ln.strip() and not ln.startswith("#")]
|
||||
return [str(k) for k in keys]
|
||||
|
||||
|
||||
def keys_to_candidates(keys: list[str]) -> list[tuple[dict, str]]:
|
||||
out: list[tuple[dict, str]] = []
|
||||
for key in keys:
|
||||
slug = key.replace(".", "-").replace("_", "-").lower()
|
||||
sid = f"surface.infotech.feature-control.{slug}"
|
||||
entry = {
|
||||
"id": sid,
|
||||
"name": f"feature flag: {key}",
|
||||
"kind": "feature-flag",
|
||||
"summary": f"Runtime feature availability controlled by feature-control key `{key}`.",
|
||||
"owner": "feature-control",
|
||||
"scope": {"allowed_layers": ["company", "environment", "tenant", "user"],
|
||||
"default_layer": "company"},
|
||||
"mutability": "hot-reloadable",
|
||||
"security_class": "operational",
|
||||
"sources": [{"repo": "feature-control", "endpoint": f"openfeature:{key}",
|
||||
"role": "feature-control-key"}],
|
||||
"relations": {"related_to": []},
|
||||
}
|
||||
out.append((entry, f"Links to feature-control key `{key}`. config-atlas maps "
|
||||
f"the flag; feature-control owns evaluation. Promote or reject.\n"))
|
||||
return out
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
keys_file = None
|
||||
if "--keys" in argv:
|
||||
i = argv.index("--keys")
|
||||
keys_file = argv[i + 1] if i + 1 < len(argv) else None
|
||||
return run_connector("feature-control", keys_to_candidates(_load_keys(keys_file)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
91
tools/connector_gitconfig.py
Normal file
91
tools/connector_gitconfig.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""git-config deterministic scanner (ATLAS-WP-0003-T03).
|
||||
|
||||
Scan a repository for configuration files and emit candidate surface entries.
|
||||
Records file *locations* and infers kind/scope; it NEVER reads or stores config
|
||||
values, and NEVER reads real secret files (only committed *.example / values /
|
||||
config files). Real `.env` is treated as a secret-bearing source -> a secret-ref
|
||||
candidate with no value.
|
||||
|
||||
Usage:
|
||||
python3 tools/connector_gitconfig.py <repo-slug> [repo-path]
|
||||
make connect-gitconfig REPO=state-hub
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from connector_base import run_connector
|
||||
|
||||
# (glob, kind, role) — order matters; first match wins per file.
|
||||
PATTERNS = [
|
||||
("**/values*.yaml", "deploy-config", "installation-overlay"),
|
||||
("**/values*.yml", "deploy-config", "installation-overlay"),
|
||||
("**/*.env.example", "app-config", "company-baseline"),
|
||||
("**/config*.yaml", "app-config", "company-baseline"),
|
||||
("**/config*.yml", "app-config", "company-baseline"),
|
||||
("**/settings*.yaml", "app-config", "company-baseline"),
|
||||
]
|
||||
SKIP_DIRS = {".git", "node_modules", ".venv", "venv", "__pycache__", "dist", "build"}
|
||||
|
||||
|
||||
def _slugify(rel: str) -> str:
|
||||
out = rel.replace("/", "-").replace(".", "-").replace("_", "-").lower()
|
||||
return "-".join(filter(None, out.split("-")))
|
||||
|
||||
|
||||
def scan(repo_slug: str, repo_path: Path) -> list[tuple[dict, str]]:
|
||||
out: list[tuple[dict, str]] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def add(rel: str, kind: str, role: str, secref: bool = False):
|
||||
sid = f"surface.infotech.{repo_slug}.{_slugify(rel)}"
|
||||
if sid in seen:
|
||||
return
|
||||
seen.add(rel)
|
||||
entry = {
|
||||
"id": sid,
|
||||
"name": f"{repo_slug}: {rel}",
|
||||
"kind": "secret-ref" if secref else kind,
|
||||
"summary": f"Configuration surface discovered at {rel} in {repo_slug}.",
|
||||
"owner": repo_slug,
|
||||
"scope": {"allowed_layers": ["company", "environment", "installation"],
|
||||
"default_layer": "company"},
|
||||
"mutability": "deploy-time",
|
||||
"security_class": "secret-ref" if secref else "operational",
|
||||
"sources": [{"repo": repo_slug, "path": rel, "role": role}],
|
||||
}
|
||||
body = (f"Discovered by `git-config` scanning `{repo_slug}`. Source: `{rel}`.\n"
|
||||
f"No values were read. Review kind/scope/owner and promote or reject.\n")
|
||||
out.append((entry, body))
|
||||
|
||||
for glob, kind, role in PATTERNS:
|
||||
for f in repo_path.glob(glob):
|
||||
if not f.is_file() or any(part in SKIP_DIRS for part in f.parts):
|
||||
continue
|
||||
add(str(f.relative_to(repo_path)), kind, role)
|
||||
|
||||
# Real .env => secret-bearing source; record as secret-ref, never read it.
|
||||
for f in repo_path.glob("**/.env"):
|
||||
if f.is_file() and not any(part in SKIP_DIRS for part in f.parts):
|
||||
add(str(f.relative_to(repo_path)), "secret-ref", "company-baseline", secref=True)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
if not argv:
|
||||
print(__doc__)
|
||||
return 2
|
||||
slug = argv[0]
|
||||
path = Path(argv[1]) if len(argv) > 1 else Path.home() / slug
|
||||
if not path.is_dir():
|
||||
print(f"error: repo path not found: {path}", file=sys.stderr)
|
||||
return 1
|
||||
return run_connector("git-config", scan(slug, path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
101
tools/connector_reposcoping.py
Normal file
101
tools/connector_reposcoping.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
"""repo-scoping fact ingestion connector (ATLAS-WP-0003-T02).
|
||||
|
||||
Consume repo-scoping observed facts/evidence as connector input and emit candidate
|
||||
configuration surfaces, adding only config-kind/layer classification on top
|
||||
(ecosystem-boundaries §2.4 option a). Read-only: zero writes to repo-scoping or the
|
||||
scanned repo.
|
||||
|
||||
Source resolution (first available):
|
||||
1. --facts <file.json> : a repo-scoping facts export (list of fact objects)
|
||||
2. REPO_SCOPING_URL env : GET {url}/repos/{slug}/facts
|
||||
Degrades gracefully (emits nothing) when no source is available.
|
||||
|
||||
Usage:
|
||||
python3 tools/connector_reposcoping.py <repo-slug> [--facts facts.json]
|
||||
make connect-reposcoping REPO=state-hub
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from connector_base import run_connector
|
||||
|
||||
CONFIG_HINTS = ("config", "env", "settings", "values", ".yaml", ".yml", ".toml", ".ini")
|
||||
|
||||
|
||||
def _load_facts(slug: str, facts_file: str | None) -> list[dict]:
|
||||
if facts_file:
|
||||
p = Path(facts_file)
|
||||
if p.exists():
|
||||
data = json.loads(p.read_text())
|
||||
return data if isinstance(data, list) else data.get("facts", [])
|
||||
print(f"repo-scoping: facts file not found: {facts_file}", file=sys.stderr)
|
||||
return []
|
||||
url = os.environ.get("REPO_SCOPING_URL")
|
||||
if url:
|
||||
try:
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(f"{url}/repos/{slug}/facts", timeout=5) as r:
|
||||
data = json.loads(r.read())
|
||||
return data if isinstance(data, list) else data.get("facts", [])
|
||||
except Exception as exc: # noqa: BLE001
|
||||
print(f"repo-scoping: API unavailable ({exc})", file=sys.stderr)
|
||||
return []
|
||||
print("repo-scoping: no --facts file and REPO_SCOPING_URL unset; nothing to ingest")
|
||||
return []
|
||||
|
||||
|
||||
def _is_config_fact(fact: dict) -> bool:
|
||||
blob = (str(fact.get("path", "")) + " " + str(fact.get("kind", "")) + " "
|
||||
+ str(fact.get("summary", ""))).lower()
|
||||
return any(h in blob for h in CONFIG_HINTS)
|
||||
|
||||
|
||||
def facts_to_candidates(slug: str, facts: list[dict]) -> list[tuple[dict, str]]:
|
||||
out: list[tuple[dict, str]] = []
|
||||
for fact in facts:
|
||||
if not _is_config_fact(fact):
|
||||
continue
|
||||
rel = str(fact.get("path", "")).strip("/")
|
||||
if not rel:
|
||||
continue
|
||||
stem = rel.replace("/", "-").replace(".", "-").replace("_", "-").lower()
|
||||
sid = f"surface.infotech.{slug}.{stem}"
|
||||
entry = {
|
||||
"id": sid,
|
||||
"name": f"{slug}: {rel}",
|
||||
"kind": "app-config",
|
||||
"summary": fact.get("summary") or f"Config surface from repo-scoping fact at {rel}.",
|
||||
"owner": slug,
|
||||
"scope": {"allowed_layers": ["company", "environment", "installation"],
|
||||
"default_layer": "company"},
|
||||
"mutability": "deploy-time",
|
||||
"security_class": "operational",
|
||||
"sources": [{"repo": slug, "path": rel, "role": "company-baseline"}],
|
||||
"evidence": {"discovery_method": "connector:repo-scoping",
|
||||
"change_log_ref": str(fact.get("id", ""))},
|
||||
}
|
||||
out.append((entry, f"Ingested from repo-scoping fact `{fact.get('id','?')}` "
|
||||
f"({rel}). Classify kind/scope and promote or reject.\n"))
|
||||
return out
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
if not argv:
|
||||
print(__doc__)
|
||||
return 2
|
||||
slug = argv[0]
|
||||
facts_file = None
|
||||
if "--facts" in argv:
|
||||
i = argv.index("--facts")
|
||||
facts_file = argv[i + 1] if i + 1 < len(argv) else None
|
||||
return run_connector("repo-scoping", facts_to_candidates(slug, _load_facts(slug, facts_file)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
84
tools/registry_health.py
Normal file
84
tools/registry_health.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Registry health: stale & unowned surface detection (ATLAS-WP-0003-T05).
|
||||
|
||||
Reports promoted surfaces that need attention:
|
||||
- unowned : missing owner, or owner not resolvable to a known identity
|
||||
- stale : evidence.last_seen older than --max-age-days (default 180), or absent
|
||||
|
||||
Ownership resolution uses reuse-surface's local-repo-roster as a stand-in for
|
||||
domain-tree identities (until domain-tree binding is wired, ATLAS-WP-0002 follow-up).
|
||||
|
||||
Usage:
|
||||
python3 tools/registry_health.py [--max-age-days N] [--strict]
|
||||
make registry-health
|
||||
Exit 0 normally; 1 when --strict and any issue is found.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as _dt
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError as exc: # pragma: no cover
|
||||
raise SystemExit(f"setup error: missing PyYAML ({exc})")
|
||||
|
||||
from effective_config import SURFACES_DIR, load_entry
|
||||
|
||||
ROSTER = Path.home() / "reuse-surface" / "registry" / "federation" / "local-repo-roster.yaml"
|
||||
|
||||
|
||||
def known_owners() -> set[str]:
|
||||
owners = {"custodian"} # State Hub domain identity not in the repo roster
|
||||
if ROSTER.exists():
|
||||
data = yaml.safe_load(ROSTER.read_text()) or {}
|
||||
owners |= {r.get("slug") for r in data.get("repos", []) if r.get("slug")}
|
||||
return owners
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
max_age = 180
|
||||
strict = "--strict" in argv
|
||||
if "--max-age-days" in argv:
|
||||
i = argv.index("--max-age-days")
|
||||
max_age = int(argv[i + 1])
|
||||
cutoff = _dt.date.today() - _dt.timedelta(days=max_age)
|
||||
owners = known_owners()
|
||||
|
||||
unowned: list[str] = []
|
||||
stale: list[str] = []
|
||||
for p in sorted(SURFACES_DIR.glob("*.md")):
|
||||
e = load_entry(p)
|
||||
sid = e.get("id", p.stem)
|
||||
owner = e.get("owner")
|
||||
if not owner:
|
||||
unowned.append(f"{sid}: missing owner")
|
||||
elif owner not in owners:
|
||||
unowned.append(f"{sid}: owner '{owner}' not resolvable to a known identity")
|
||||
seen = (e.get("evidence", {}) or {}).get("last_seen")
|
||||
if not seen:
|
||||
stale.append(f"{sid}: no evidence.last_seen")
|
||||
else:
|
||||
try:
|
||||
if _dt.date.fromisoformat(str(seen)) < cutoff:
|
||||
stale.append(f"{sid}: last_seen {seen} older than {max_age}d")
|
||||
except ValueError:
|
||||
stale.append(f"{sid}: unparseable last_seen '{seen}'")
|
||||
|
||||
total = len(list(SURFACES_DIR.glob("*.md")))
|
||||
print(f"registry health: {total} promoted surface(s)")
|
||||
print(f" unowned/unresolved: {len(unowned)}")
|
||||
for u in unowned:
|
||||
print(f" - {u}")
|
||||
print(f" stale (> {max_age}d): {len(stale)}")
|
||||
for s in stale:
|
||||
print(f" - {s}")
|
||||
if not unowned and not stale:
|
||||
print(" all surfaces owned and fresh.")
|
||||
return 1 if (strict and (unowned or stale)) else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
Reference in New Issue
Block a user