#!/usr/bin/env python3 """git-config deterministic scanner (ATLAS-WP-0003-T03). Scan a repository for configuration files and emit candidate surface entries. Records file *locations* and infers kind/scope; it NEVER reads or stores config values, and NEVER reads real secret files (only committed *.example / values / config files). Real `.env` is treated as a secret-bearing source -> a secret-ref candidate with no value. Usage: python3 tools/connector_gitconfig.py [repo-path] make connect-gitconfig REPO=state-hub """ from __future__ import annotations import sys from pathlib import Path from connector_base import run_connector # (glob, kind, role) — order matters; first match wins per file. PATTERNS = [ ("**/values*.yaml", "deploy-config", "installation-overlay"), ("**/values*.yml", "deploy-config", "installation-overlay"), ("**/*.env.example", "app-config", "company-baseline"), ("**/config*.yaml", "app-config", "company-baseline"), ("**/config*.yml", "app-config", "company-baseline"), ("**/settings*.yaml", "app-config", "company-baseline"), ] SKIP_DIRS = {".git", "node_modules", ".venv", "venv", "__pycache__", "dist", "build"} def _slugify(rel: str) -> str: out = rel.replace("/", "-").replace(".", "-").replace("_", "-").lower() return "-".join(filter(None, out.split("-"))) def scan(repo_slug: str, repo_path: Path) -> list[tuple[dict, str]]: out: list[tuple[dict, str]] = [] seen: set[str] = set() def add(rel: str, kind: str, role: str, secref: bool = False): sid = f"surface.infotech.{repo_slug}.{_slugify(rel)}" if sid in seen: return seen.add(rel) entry = { "id": sid, "name": f"{repo_slug}: {rel}", "kind": "secret-ref" if secref else kind, "summary": f"Configuration surface discovered at {rel} in {repo_slug}.", "owner": repo_slug, "scope": {"allowed_layers": ["company", "environment", "installation"], "default_layer": "company"}, "mutability": "deploy-time", "security_class": "secret-ref" if secref else "operational", "sources": [{"repo": repo_slug, "path": rel, "role": role}], } body = (f"Discovered by `git-config` scanning `{repo_slug}`. Source: `{rel}`.\n" f"No values were read. Review kind/scope/owner and promote or reject.\n") out.append((entry, body)) for glob, kind, role in PATTERNS: for f in repo_path.glob(glob): if not f.is_file() or any(part in SKIP_DIRS for part in f.parts): continue add(str(f.relative_to(repo_path)), kind, role) # Real .env => secret-bearing source; record as secret-ref, never read it. for f in repo_path.glob("**/.env"): if f.is_file() and not any(part in SKIP_DIRS for part in f.parts): add(str(f.relative_to(repo_path)), "secret-ref", "company-baseline", secref=True) return out def main(argv: list[str]) -> int: if not argv: print(__doc__) return 2 slug = argv[0] path = Path(argv[1]) if len(argv) > 1 else Path.home() / slug if not path.is_dir(): print(f"error: repo path not found: {path}", file=sys.stderr) return 1 return run_connector("git-config", scan(slug, path)) if __name__ == "__main__": sys.path.insert(0, str(Path(__file__).resolve().parent)) raise SystemExit(main(sys.argv[1:]))