Implement REUSE-WP-0013 registry establish, update, and stats

Add stats, establish (scaffold, publish-check, discover), and update CLI commands with optional llm-connect bridge, validate --root for sibling repos, pytest coverage, and documentation for sibling registry onboarding.
2026-06-16 01:21:01 +02:00
parent fb712b4b98
commit 70a5003f6e
19 changed files with 1740 additions and 30 deletions
--- a/tests/test_establish.py
+++ b/tests/test_establish.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import patch
+
+import yaml
+
+from reuse_surface.establish import (
+    discover_capabilities,
+    publish_check,
+    scaffold_registry,
+)
+from reuse_surface.registry import registry_paths
+
+
+def test_scaffold_creates_layout(tmp_path: Path):
+    created = scaffold_registry(tmp_path, domain="helix_forge")
+    paths = registry_paths(tmp_path)
+    assert paths["index"] in created
+    data = yaml.safe_load(paths["index"].read_text(encoding="utf-8"))
+    assert data["capabilities"] == []
+    assert data["domain"] == "helix_forge"
+
+
+def test_scaffold_refuses_existing_without_force(tmp_path: Path):
+    scaffold_registry(tmp_path)
+    try:
+        scaffold_registry(tmp_path)
+        raise AssertionError("expected ValueError")
+    except ValueError as exc:
+        assert "already exists" in str(exc)
+
+
+def test_publish_check_local_index(tmp_path: Path):
+    scaffold_registry(tmp_path)
+    result = publish_check(tmp_path)
+    assert result["ok"] is True
+    assert any(check["name"] == "local_index_yaml" for check in result["checks"])
+
+
+def test_publish_check_raw_url_fail(tmp_path: Path):
+    with patch(
+        "reuse_surface.establish._probe_raw_url",
+        return_value={"ok": False, "status": 303, "content_type": "text/html"},
+    ):
+        result = publish_check(
+            tmp_path,
+            raw_url="https://example.com/capabilities.yaml",
+        )
+    assert result["ok"] is False
+    assert result.get("remediation")
+
+
+def test_discover_dry_run_mock_llm(tmp_path: Path):
+    scaffold_registry(tmp_path)
+    (tmp_path / "README.md").write_text("# Demo service\n", encoding="utf-8")
+    draft = {
+        "domain": "helix_forge",
+        "capabilities": [
+            {
+                "id": "capability.demo.sample",
+                "name": "Sample",
+                "summary": "Sample capability.",
+                "owner": "demo",
+                "vector": "D2 / A0 / C0 / R0",
+                "tags": ["demo"],
+                "consumption_modes": ["informational"],
+                "discovery_intent": "Enable demo planning.",
+            }
+        ],
+    }
+    with patch(
+        "reuse_surface.establish.request_registry_draft",
+        return_value=draft,
+    ):
+        result = discover_capabilities(tmp_path, dry_run=True, apply=False)
+    assert result["draft"]["capabilities"][0]["id"] == "capability.demo.sample"
--- a/tests/test_llm_bridge.py
+++ b/tests/test_llm_bridge.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import json
+from unittest.mock import patch
+
+import pytest
+
+from reuse_surface.llm_bridge import (
+    extract_json_object,
+    llm_connect_url,
+    request_registry_draft,
+)
+
+
+def test_extract_json_object_from_fenced_block():
+    data = extract_json_object('```json\n{"capabilities": []}\n```')
+    assert data == {"capabilities": []}
+
+
+def test_llm_connect_url_missing_raises():
+    with pytest.raises(ValueError, match="LLM_CONNECT_URL"):
+        llm_connect_url(None)
+
+
+def test_request_registry_draft_mock_http():
+    payload = {
+        "content": json.dumps(
+            {
+                "capabilities": [
+                    {
+                        "id": "capability.demo.sample",
+                        "name": "Sample",
+                        "summary": "Demo capability",
+                    }
+                ]
+            }
+        )
+    }
+
+    class FakeResponse:
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *args):
+            return False
+
+        def read(self):
+            return json.dumps(payload).encode("utf-8")
+
+    with patch.dict("os.environ", {"LLM_CONNECT_URL": "http://llm.test"}):
+        with patch("urllib.request.urlopen", return_value=FakeResponse()):
+            draft = request_registry_draft("test prompt")
+    assert draft["capabilities"][0]["id"] == "capability.demo.sample"
--- a/tests/test_registry_update.py
+++ b/tests/test_registry_update.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import yaml
+
+from reuse_surface.establish import scaffold_registry
+from reuse_surface.registry import load_index_at, registry_paths
+from reuse_surface.registry_update import (
+    apply_deterministic_suggestions,
+    collect_deterministic_suggestions,
+)
+
+
+def _write_minimal_entry(tmp_path: Path, cap_id: str, vector: str) -> str:
+    rel = "registry/capabilities/capability-demo-sample.md"
+    d, a, c, r = [part.strip() for part in vector.split("/")]
+    front_matter = {
+        "id": cap_id,
+        "name": "Sample",
+        "summary": "Sample",
+        "owner": "demo",
+        "status": "draft",
+        "domain": "helix_forge",
+        "tags": ["demo"],
+        "maturity": {
+            "discovery": {"current": d, "target": "D5", "confidence": "low"},
+            "availability": {"current": a, "target": "A3", "confidence": "low"},
+        },
+        "external_evidence": {
+            "completeness": {"level": c, "confidence": "low"},
+            "reliability": {"level": r, "confidence": "low"},
+        },
+        "discovery": {"intent": "demo", "includes": [], "excludes": []},
+        "availability": {
+            "current_level": a,
+            "target_level": "A3",
+            "current_artifacts": [],
+            "consumption_modes": ["informational"],
+        },
+        "relations": {"depends_on": [], "supports": [], "related_to": []},
+        "evidence": {"documentation": [], "tests": []},
+        "consumer_guidance": {
+            "recommended_for": [],
+            "not_recommended_for": [],
+            "known_limitations": [],
+        },
+    }
+    path = tmp_path / rel
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        "---\n"
+        + yaml.safe_dump(front_matter, sort_keys=False)
+        + "---\n",
+        encoding="utf-8",
+    )
+    return rel
+
+
+def test_vector_drift_suggestion(tmp_path: Path):
+    scaffold_registry(tmp_path)
+    cap_id = "capability.demo.sample"
+    rel = _write_minimal_entry(tmp_path, cap_id, "D3 / A0 / C0 / R0")
+    index_path = registry_paths(tmp_path)["index"]
+    index = load_index_at(index_path)
+    index["capabilities"] = [
+        {
+            "id": cap_id,
+            "name": "Sample",
+            "summary": "Sample",
+            "vector": "D2 / A0 / C0 / R0",
+            "domain": "helix_forge",
+            "status": "draft",
+            "owner": "demo",
+            "path": rel,
+            "tags": ["demo"],
+            "consumption_modes": ["informational"],
+        }
+    ]
+    index_path.write_text(yaml.safe_dump(index, sort_keys=False), encoding="utf-8")
+
+    suggestions = collect_deterministic_suggestions(tmp_path, capability_id=cap_id)
+    assert any(item["kind"] == "vector_drift" for item in suggestions)
+    changed = apply_deterministic_suggestions(tmp_path, suggestions)
+    assert changed
+    updated = load_index_at(index_path)
+    assert updated["capabilities"][0]["vector"] == "D3 / A0 / C0 / R0"
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from reuse_surface.stats import collect_stats, format_stats_markdown
+
+
+def test_collect_stats_on_repo_root():
+    root = Path(__file__).resolve().parent.parent
+    stats = collect_stats(root)
+    assert stats["capability_count"] == 20
+    assert stats["index_present"] is True
+    assert "discovery" in stats["histograms"]
+
+
+def test_format_stats_markdown_contains_count():
+    root = Path(__file__).resolve().parent.parent
+    text = format_stats_markdown(collect_stats(root))
+    assert "Capabilities:" in text
+    assert "20" in text