generated from coulomb/repo-seed
Add deterministic repo scanner
This commit is contained in:
@@ -20,6 +20,41 @@ repository, one commit, and one scan profile. It contains:
|
||||
|
||||
The JSON schema lives at `schemas/discovery-snapshot.schema.yaml`.
|
||||
|
||||
## Deterministic Scanner CLI
|
||||
|
||||
The first implementation slice adds an offline deterministic scan command:
|
||||
|
||||
```bash
|
||||
railiance-fabric scan . \
|
||||
--repo-slug railiance-fabric \
|
||||
--commit "$(git rev-parse HEAD)" \
|
||||
--dry-run \
|
||||
--output discovery-snapshot.json
|
||||
```
|
||||
|
||||
Use `--json` to print the full `FabricDiscoverySnapshot` to stdout. Without
|
||||
`--json`, the command prints a concise summary of node, edge, attribute, and
|
||||
replacement-scope counts. The scanner does not call registries, catalogs, or
|
||||
LLMs in this mode; `--output` is the only write side effect.
|
||||
|
||||
The deterministic extractor framework currently covers:
|
||||
|
||||
- repository metadata from local git/path evidence
|
||||
- README, INTENT, and SCOPE document presence and headings
|
||||
- repo-owned Fabric declarations under `fabric/`
|
||||
- Python `pyproject.toml` package metadata and dependencies
|
||||
- Node `package.json` package metadata and dependencies
|
||||
- common lockfiles such as `package-lock.json`, `poetry.lock`, and `uv.lock`
|
||||
- Dockerfiles and Docker Compose services
|
||||
- OpenAPI and AsyncAPI contract files
|
||||
- Score workload files
|
||||
- Kubernetes-style deployment manifests
|
||||
- common service config files such as `application.yaml` and
|
||||
`appsettings.json`
|
||||
|
||||
Each extractor emits candidates through the same accumulator so stable-key
|
||||
duplicates merge inside a scan before the snapshot is returned.
|
||||
|
||||
## Identity
|
||||
|
||||
Identity is the main safety boundary. The scanner must not append guesses on
|
||||
|
||||
@@ -13,6 +13,7 @@ from pathlib import Path
|
||||
from .loader import declaration_files, load_yaml
|
||||
from .graph import FabricGraph, build_graph
|
||||
from .graph_explorer import fabric_graph_explorer_payload
|
||||
from .scanner import ScanOptions, scan_repo
|
||||
from .validation import validate_roots
|
||||
|
||||
|
||||
@@ -62,6 +63,17 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
export.add_argument("paths", nargs="*", type=Path, default=[Path(".")])
|
||||
export.add_argument("--format", choices=["json", "mermaid", "graph-explorer"], default="json")
|
||||
|
||||
scan = sub.add_parser("scan", help="Scan a repo for deterministic discovery candidates.")
|
||||
scan.add_argument("path", nargs="?", type=Path, default=Path("."))
|
||||
scan.add_argument("--repo-slug", default=None)
|
||||
scan.add_argument("--repo-name", default=None)
|
||||
scan.add_argument("--domain", default=None)
|
||||
scan.add_argument("--commit", default=None)
|
||||
scan.add_argument("--profile", default="deterministic")
|
||||
scan.add_argument("--dry-run", action="store_true", help="Do not write anywhere except an explicit --output file.")
|
||||
scan.add_argument("--output", type=Path, default=None, help="Write the discovery snapshot JSON to a file.")
|
||||
scan.add_argument("--json", action="store_true", help="Print the discovery snapshot JSON to stdout.")
|
||||
|
||||
registry = sub.add_parser("registry", help="Feed a running Railiance Fabric registry service.")
|
||||
registry_sub = registry.add_subparsers(dest="registry_command", required=True)
|
||||
|
||||
@@ -140,6 +152,9 @@ def main(argv: list[str] | None = None) -> int:
|
||||
print(graph.to_json())
|
||||
return 0
|
||||
|
||||
if args.command == "scan":
|
||||
return _scan_repo(args)
|
||||
|
||||
if args.command == "registry":
|
||||
if args.registry_command == "sync":
|
||||
return _registry_sync(args)
|
||||
@@ -368,6 +383,42 @@ def _registry_ingest_cyclonedx(args: argparse.Namespace) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def _scan_repo(args: argparse.Namespace) -> int:
|
||||
snapshot = scan_repo(
|
||||
ScanOptions(
|
||||
repo_path=args.path,
|
||||
repo_slug=args.repo_slug,
|
||||
repo_name=args.repo_name,
|
||||
domain=args.domain,
|
||||
commit=args.commit,
|
||||
profile=args.profile,
|
||||
deterministic_only=True,
|
||||
llm_enabled=False,
|
||||
)
|
||||
)
|
||||
payload = json.dumps(snapshot, indent=2, sort_keys=True)
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(payload + "\n", encoding="utf-8")
|
||||
if args.json:
|
||||
print(payload)
|
||||
return 0
|
||||
|
||||
candidates = snapshot["candidates"]
|
||||
mode = "dry-run " if args.dry_run else ""
|
||||
print(
|
||||
f"{mode}scan {snapshot['source']['repo_slug']} "
|
||||
f"({snapshot['source']['commit']}): "
|
||||
f"{len(candidates['nodes'])} node(s), "
|
||||
f"{len(candidates['edges'])} edge(s), "
|
||||
f"{len(candidates['attributes'])} attribute(s), "
|
||||
f"{len(snapshot['replacement_scopes'])} replacement scope(s)"
|
||||
)
|
||||
if args.output:
|
||||
print(f"wrote {args.output}")
|
||||
return 0
|
||||
|
||||
|
||||
class RegistryRequestError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
1209
railiance_fabric/scanner.py
Normal file
1209
railiance_fabric/scanner.py
Normal file
File diff suppressed because it is too large
Load Diff
251
tests/test_scanner.py
Normal file
251
tests/test_scanner.py
Normal file
@@ -0,0 +1,251 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from railiance_fabric.cli import main as cli_main
|
||||
from railiance_fabric.scanner import ScanOptions, scan_repo
|
||||
from railiance_fabric.schema_validation import draft202012_validator
|
||||
|
||||
|
||||
def test_scan_repo_emits_schema_valid_deterministic_snapshot(tmp_path: Path) -> None:
|
||||
repo = _fixture_repo(tmp_path)
|
||||
|
||||
snapshot = scan_repo(
|
||||
ScanOptions(
|
||||
repo_path=repo,
|
||||
repo_slug="fixture-repo",
|
||||
repo_name="Fixture Repo",
|
||||
domain="testing",
|
||||
commit="abc123",
|
||||
)
|
||||
)
|
||||
|
||||
_validate_schema("discovery-snapshot.schema.yaml", snapshot)
|
||||
assert snapshot["source"]["repo_slug"] == "fixture-repo"
|
||||
assert snapshot["source"]["commit"] == "abc123"
|
||||
assert snapshot["scan"]["deterministic_only"] is True
|
||||
assert snapshot["scan"]["llm_enabled"] is False
|
||||
|
||||
candidates = snapshot["candidates"]
|
||||
nodes_by_label = {(node["kind"], node["label"]): node for node in candidates["nodes"]}
|
||||
assert nodes_by_label[("Repository", "Fixture Repo")]["review_state"] == "candidate"
|
||||
assert nodes_by_label[("ServiceDeclaration", "Fixture API")]["review_state"] == "accepted"
|
||||
assert nodes_by_label[("Library", "fixture-service")]["attributes"]["language"] == "python"
|
||||
assert nodes_by_label[("ExternalLibrary", "PyYAML")]["attributes"]["ecosystem"] == "python"
|
||||
assert nodes_by_label[("DeploymentService", "api")]["attributes"]["orchestrator"] == "docker-compose"
|
||||
assert nodes_by_label[("ContainerBuild", "Dockerfile")]["attributes"]["base_images"] == ["python:3.12-slim"]
|
||||
assert nodes_by_label[("InterfaceDeclaration", "Fixture API Contract")]["attributes"]["contract_kind"] == "openapi"
|
||||
assert nodes_by_label[("KubernetesDeployment", "fixture-api")]["attributes"]["manifest_kind"] == "Deployment"
|
||||
assert nodes_by_label[("ScoreWorkload", "fixture-api")]["attributes"]["container_count"] == 1
|
||||
assert nodes_by_label[("Lockfile", "package-lock.json")]["attributes"]["path"] == "package-lock.json"
|
||||
assert nodes_by_label[("ServiceConfig", "application.yaml")]["attributes"]["format"] == "yaml"
|
||||
|
||||
edge_types = {edge["edge_type"] for edge in candidates["edges"]}
|
||||
assert edge_types >= {
|
||||
"declares_package",
|
||||
"depends_on_library",
|
||||
"defines_deployment",
|
||||
"builds_container",
|
||||
"documents_interface",
|
||||
"defines_runtime_object",
|
||||
"defines_workload",
|
||||
"uses_config",
|
||||
"provides",
|
||||
"exposes",
|
||||
}
|
||||
assert {attribute["name"] for attribute in candidates["attributes"]} >= {
|
||||
"readme_title",
|
||||
"intent_present",
|
||||
"scope_present",
|
||||
}
|
||||
|
||||
for collection_name in ("nodes", "edges", "attributes"):
|
||||
stable_keys = [item["stable_key"] for item in candidates[collection_name]]
|
||||
assert len(stable_keys) == len(set(stable_keys))
|
||||
assert all(item["source_anchors"][0]["fingerprint"] for item in candidates[collection_name])
|
||||
|
||||
scope_ids = [scope["id"] for scope in snapshot["replacement_scopes"]]
|
||||
assert len(scope_ids) == len(set(scope_ids))
|
||||
assert {scope["source_kind"] for scope in snapshot["replacement_scopes"]} >= {
|
||||
"declaration",
|
||||
"package_manifest",
|
||||
"lockfile",
|
||||
"deployment_manifest",
|
||||
"api_contract",
|
||||
"service_config",
|
||||
"file",
|
||||
}
|
||||
|
||||
|
||||
def test_scan_cli_can_write_snapshot_and_print_summary(tmp_path: Path, capsys) -> None:
|
||||
repo = _fixture_repo(tmp_path)
|
||||
output = tmp_path / "snapshot.json"
|
||||
|
||||
assert cli_main(
|
||||
[
|
||||
"scan",
|
||||
str(repo),
|
||||
"--repo-slug",
|
||||
"fixture-repo",
|
||||
"--repo-name",
|
||||
"Fixture Repo",
|
||||
"--commit",
|
||||
"abc123",
|
||||
"--dry-run",
|
||||
"--output",
|
||||
str(output),
|
||||
]
|
||||
) == 0
|
||||
|
||||
summary = capsys.readouterr().out
|
||||
assert "dry-run scan fixture-repo (abc123):" in summary
|
||||
assert "replacement scope(s)" in summary
|
||||
payload = json.loads(output.read_text(encoding="utf-8"))
|
||||
_validate_schema("discovery-snapshot.schema.yaml", payload)
|
||||
|
||||
|
||||
def _fixture_repo(tmp_path: Path) -> Path:
|
||||
repo = tmp_path / "fixture-repo"
|
||||
repo.mkdir()
|
||||
_write(repo / "README.md", "# Fixture Repo\n\nRuns the fixture API.\n")
|
||||
_write(repo / "INTENT.md", "# Intent\n\nShow deterministic scanner evidence.\n")
|
||||
_write(repo / "SCOPE.md", "# Scope\n\nLocal test fixture.\n")
|
||||
_write(
|
||||
repo / "pyproject.toml",
|
||||
"""
|
||||
[project]
|
||||
name = "fixture-service"
|
||||
version = "0.1.0"
|
||||
description = "Fixture service"
|
||||
dependencies = [
|
||||
"PyYAML>=6.0",
|
||||
"jsonschema>=4.18",
|
||||
]
|
||||
""".lstrip(),
|
||||
)
|
||||
_write(
|
||||
repo / "package.json",
|
||||
json.dumps(
|
||||
{
|
||||
"name": "@fixture/web",
|
||||
"version": "0.1.0",
|
||||
"private": True,
|
||||
"scripts": {"build": "vite build"},
|
||||
"dependencies": {"cytoscape": "^3.30.0"},
|
||||
"devDependencies": {"vite": "^5.0.0"},
|
||||
},
|
||||
indent=2,
|
||||
),
|
||||
)
|
||||
_write(repo / "package-lock.json", '{"lockfileVersion": 3}\n')
|
||||
_write(repo / "Dockerfile", "FROM python:3.12-slim\nCOPY . /app\n")
|
||||
_write(
|
||||
repo / "compose.yaml",
|
||||
"""
|
||||
services:
|
||||
api:
|
||||
build: .
|
||||
ports:
|
||||
- "8080:8080"
|
||||
""".lstrip(),
|
||||
)
|
||||
_write(
|
||||
repo / "openapi.yaml",
|
||||
"""
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: Fixture API Contract
|
||||
version: 0.1.0
|
||||
paths: {}
|
||||
""".lstrip(),
|
||||
)
|
||||
_write(
|
||||
repo / "score.yaml",
|
||||
"""
|
||||
metadata:
|
||||
name: fixture-api
|
||||
containers:
|
||||
api:
|
||||
image: fixture/api
|
||||
""".lstrip(),
|
||||
)
|
||||
_write(
|
||||
repo / "application.yaml",
|
||||
"server:\n port: 8080\n",
|
||||
)
|
||||
_write(
|
||||
repo / "deploy" / "deployment.yaml",
|
||||
"""
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: fixture-api
|
||||
spec: {}
|
||||
""".lstrip(),
|
||||
)
|
||||
_write(
|
||||
repo / "fabric" / "services" / "fixture-api.yaml",
|
||||
"""
|
||||
apiVersion: railiance.fabric/v1alpha1
|
||||
kind: ServiceDeclaration
|
||||
metadata:
|
||||
id: fixture.api
|
||||
name: Fixture API
|
||||
owner: test
|
||||
repo: fixture-repo
|
||||
domain: testing
|
||||
spec:
|
||||
lifecycle: active
|
||||
provides_capabilities:
|
||||
- fixture.api-capability
|
||||
exposes_interfaces:
|
||||
- fixture.api-http
|
||||
""".lstrip(),
|
||||
)
|
||||
_write(
|
||||
repo / "fabric" / "capabilities" / "fixture-api-capability.yaml",
|
||||
"""
|
||||
apiVersion: railiance.fabric/v1alpha1
|
||||
kind: CapabilityDeclaration
|
||||
metadata:
|
||||
id: fixture.api-capability
|
||||
name: Fixture API Capability
|
||||
owner: test
|
||||
repo: fixture-repo
|
||||
domain: testing
|
||||
spec:
|
||||
capability_type: fixture-api
|
||||
lifecycle: active
|
||||
service_id: fixture.api
|
||||
interface_ids:
|
||||
- fixture.api-http
|
||||
""".lstrip(),
|
||||
)
|
||||
_write(
|
||||
repo / "fabric" / "interfaces" / "fixture-api-http.yaml",
|
||||
"""
|
||||
apiVersion: railiance.fabric/v1alpha1
|
||||
kind: InterfaceDeclaration
|
||||
metadata:
|
||||
id: fixture.api-http
|
||||
name: Fixture API HTTP
|
||||
owner: test
|
||||
repo: fixture-repo
|
||||
domain: testing
|
||||
spec:
|
||||
interface_type: http-api
|
||||
lifecycle: active
|
||||
""".lstrip(),
|
||||
)
|
||||
return repo
|
||||
|
||||
|
||||
def _write(path: Path, content: str) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
def _validate_schema(schema_name: str, payload: dict[str, object]) -> None:
|
||||
validator = draft202012_validator(Path("schemas") / schema_name)
|
||||
validator.validate(payload)
|
||||
@@ -147,7 +147,7 @@ Acceptance notes:
|
||||
|
||||
```task
|
||||
id: RAIL-FAB-WP-0010-T02
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "5d2ff304-9c79-4699-bf8c-ed6db3a90d9f"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user