Files
config-atlas/tools/connector_gitconfig.py
tegwick bc702db4cf
Some checks failed
validate-registry / validate (push) Has been cancelled
feat(connectors): complete ATLAS-WP-0003 — discovery connectors (Phase 2)
T01 connector_base + docs/discovery-connectors.md (read-only/stateless,
candidate->PR->promote; `candidate` added to schema status enum; candidates/
gitignored, excluded from gate).
T02 connector_reposcoping (repo-scoping facts -> candidates; graceful degrade).
T03 connector_gitconfig (deterministic scan; real .env -> secret-ref, no values;
verified 4 real candidates from ~/state-hub).
T04 connector_featurecontrol (feature-flag surfaces linking to feature-control
keys, no eval logic; FR-12).
T05 registry_health (unowned + stale detection).
Make targets: connect-gitconfig/reposcoping/featurecontrol, registry-health.

WP-0003 finished (5/5).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 00:27:57 +02:00

92 lines
3.4 KiB
Python

#!/usr/bin/env python3
"""git-config deterministic scanner (ATLAS-WP-0003-T03).
Scan a repository for configuration files and emit candidate surface entries.
Records file *locations* and infers kind/scope; it NEVER reads or stores config
values, and NEVER reads real secret files (only committed *.example / values /
config files). Real `.env` is treated as a secret-bearing source -> a secret-ref
candidate with no value.
Usage:
python3 tools/connector_gitconfig.py <repo-slug> [repo-path]
make connect-gitconfig REPO=state-hub
"""
from __future__ import annotations
import sys
from pathlib import Path
from connector_base import run_connector
# (glob, kind, role) — order matters; first match wins per file.
PATTERNS = [
("**/values*.yaml", "deploy-config", "installation-overlay"),
("**/values*.yml", "deploy-config", "installation-overlay"),
("**/*.env.example", "app-config", "company-baseline"),
("**/config*.yaml", "app-config", "company-baseline"),
("**/config*.yml", "app-config", "company-baseline"),
("**/settings*.yaml", "app-config", "company-baseline"),
]
SKIP_DIRS = {".git", "node_modules", ".venv", "venv", "__pycache__", "dist", "build"}
def _slugify(rel: str) -> str:
out = rel.replace("/", "-").replace(".", "-").replace("_", "-").lower()
return "-".join(filter(None, out.split("-")))
def scan(repo_slug: str, repo_path: Path) -> list[tuple[dict, str]]:
out: list[tuple[dict, str]] = []
seen: set[str] = set()
def add(rel: str, kind: str, role: str, secref: bool = False):
sid = f"surface.infotech.{repo_slug}.{_slugify(rel)}"
if sid in seen:
return
seen.add(rel)
entry = {
"id": sid,
"name": f"{repo_slug}: {rel}",
"kind": "secret-ref" if secref else kind,
"summary": f"Configuration surface discovered at {rel} in {repo_slug}.",
"owner": repo_slug,
"scope": {"allowed_layers": ["company", "environment", "installation"],
"default_layer": "company"},
"mutability": "deploy-time",
"security_class": "secret-ref" if secref else "operational",
"sources": [{"repo": repo_slug, "path": rel, "role": role}],
}
body = (f"Discovered by `git-config` scanning `{repo_slug}`. Source: `{rel}`.\n"
f"No values were read. Review kind/scope/owner and promote or reject.\n")
out.append((entry, body))
for glob, kind, role in PATTERNS:
for f in repo_path.glob(glob):
if not f.is_file() or any(part in SKIP_DIRS for part in f.parts):
continue
add(str(f.relative_to(repo_path)), kind, role)
# Real .env => secret-bearing source; record as secret-ref, never read it.
for f in repo_path.glob("**/.env"):
if f.is_file() and not any(part in SKIP_DIRS for part in f.parts):
add(str(f.relative_to(repo_path)), "secret-ref", "company-baseline", secref=True)
return out
def main(argv: list[str]) -> int:
if not argv:
print(__doc__)
return 2
slug = argv[0]
path = Path(argv[1]) if len(argv) > 1 else Path.home() / slug
if not path.is_dir():
print(f"error: repo path not found: {path}", file=sys.stderr)
return 1
return run_connector("git-config", scan(slug, path))
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parent))
raise SystemExit(main(sys.argv[1:]))