from __future__ import annotations import json import subprocess import textwrap from datetime import date from pathlib import Path from typing import Any import yaml from reuse_surface.llm_bridge import request_json_object from reuse_surface.registry import ( entry_vector, load_index_at, parse_front_matter, registry_paths, vectors_match, ) # Safe to apply without interactive review (see patches.SAFE_DETERMINISTIC_KINDS). SAFE_EVIDENCE_PREFIXES = ("tests/", ".gitea/workflows/") def git_changed_files(repo_root: Path, since_ref: str) -> list[str]: result = subprocess.run( ["git", "-C", str(repo_root), "diff", "--name-only", since_ref, "HEAD"], capture_output=True, text=True, check=False, ) if result.returncode != 0: raise ValueError(result.stderr.strip() or f"git diff failed for {since_ref}") return [line.strip() for line in result.stdout.splitlines() if line.strip()] def collect_deterministic_suggestions( repo_root: Path, *, capability_id: str | None = None, git_since: str | None = None, ) -> list[dict[str, Any]]: paths = registry_paths(repo_root) if not paths["index"].exists(): raise ValueError("registry index missing; run establish --scaffold first") index = load_index_at(paths["index"]) rows = index.get("capabilities", []) if capability_id: rows = [row for row in rows if row["id"] == capability_id] if not rows: raise ValueError(f"capability not in index: {capability_id}") changed_files = git_changed_files(repo_root, git_since) if git_since else [] suggestions: list[dict[str, Any]] = [] suggestions.extend(_collect_index_orphans(repo_root, index, changed_files)) for row in rows: entry_path = repo_root / row["path"] if not entry_path.exists(): suggestions.append( { "capability_id": row["id"], "kind": "missing_entry", "detail": f"missing file {row['path']}", } ) continue front_matter = parse_front_matter(entry_path) if not vectors_match(row["vector"], front_matter): suggestions.append( { "capability_id": row["id"], "kind": "vector_drift", "detail": "index vector differs from entry front matter", "index_vector": row["vector"], "entry_vector": entry_vector(front_matter), "apply_patch": { "field": "index.vector", "value": entry_vector(front_matter), }, } ) suggestions.extend( _collect_changed_file_suggestions(row["id"], front_matter, changed_files, repo_root) ) return suggestions def _collect_index_orphans( repo_root: Path, index: dict[str, Any], changed_files: list[str], ) -> list[dict[str, Any]]: suggestions: list[dict[str, Any]] = [] indexed_paths = {row["path"] for row in index.get("capabilities", [])} cap_dir = registry_paths(repo_root)["capabilities"] if not cap_dir.exists(): return suggestions for entry_file in sorted(cap_dir.glob("*.md")): if entry_file.name == ".gitkeep": continue rel = str(entry_file.relative_to(repo_root)) if rel in indexed_paths: continue try: front_matter = parse_front_matter(entry_file) except ValueError: continue cap_id = front_matter.get("id", entry_file.stem.replace("-", ".")) suggestions.append( { "capability_id": cap_id, "kind": "index_row_add", "detail": f"capability file not in index: {rel}", "apply_patch": { "field": "index.capabilities", "index_row": { "id": cap_id, "name": front_matter.get("name", cap_id), "summary": front_matter.get("summary", ""), "vector": entry_vector(front_matter), "domain": front_matter.get("domain", index.get("domain", "helix_forge")), "status": front_matter.get("status", "draft"), "owner": front_matter.get("owner", repo_root.name), "path": rel, "tags": front_matter.get("tags", []), "consumption_modes": front_matter.get("availability", {}).get( "consumption_modes", ["informational"] ), }, }, } ) index_updated = index.get("updated") registry_touched = any(path.startswith("registry/") for path in changed_files) if registry_touched and index_updated != date.today().isoformat(): first_id = index.get("capabilities", [{}])[0].get("id", "registry") suggestions.append( { "capability_id": first_id, "kind": "index_updated_stale", "detail": "registry/ changed; bump index updated date", "apply_patch": {"field": "index.updated", "value": date.today().isoformat()}, } ) return suggestions def _pyproject_script_artifacts(repo_root: Path) -> list[str]: pyproject = repo_root / "pyproject.toml" if not pyproject.exists(): return [] try: import tomllib data = tomllib.loads(pyproject.read_text(encoding="utf-8")) except (OSError, ValueError): return [] scripts = data.get("project", {}).get("scripts", {}) return [f"pyproject.toml:[project.scripts].{name}" for name in sorted(scripts)] def _collect_changed_file_suggestions( cap_id: str, front_matter: dict[str, Any], changed_files: list[str], repo_root: Path, ) -> list[dict[str, Any]]: suggestions: list[dict[str, Any]] = [] evidence = front_matter.setdefault("evidence", {}) evidence_tests = evidence.get("tests", []) evidence_docs = evidence.get("documentation", []) pkg_prefixes = tuple( p.name + "/" for p in repo_root.iterdir() if p.is_dir() and (p / "__init__.py").exists() ) for changed in changed_files: if changed.startswith("tests/") and changed not in evidence_tests: suggestions.append( { "capability_id": cap_id, "kind": "evidence_test", "detail": f"new test file not cited: {changed}", "apply_patch": {"field": "evidence.tests", "append": changed}, } ) if changed.startswith(".gitea/workflows/") and changed.endswith((".yml", ".yaml")): field = "evidence.tests" if "test" in changed.lower() else "evidence.documentation" existing = evidence_tests if field == "evidence.tests" else evidence_docs if changed not in existing: suggestions.append( { "capability_id": cap_id, "kind": "evidence_workflow", "detail": f"workflow changed not cited: {changed}", "apply_patch": {"field": field, "append": changed}, } ) if changed.startswith("docs/") and changed not in evidence_docs: suggestions.append( { "capability_id": cap_id, "kind": "evidence_documentation", "detail": f"doc changed not cited: {changed}", "apply_patch": {"field": "evidence.documentation", "append": changed}, } ) artifacts = front_matter.get("availability", {}).get("current_artifacts", []) for changed in changed_files: if changed.endswith(".py") and changed.startswith(pkg_prefixes): if changed not in artifacts: suggestions.append( { "capability_id": cap_id, "kind": "availability_artifact", "detail": f"changed module not cited: {changed}", "apply_patch": { "field": "availability.current_artifacts", "append": changed, }, } ) if changed == "pyproject.toml": for script_ref in _pyproject_script_artifacts(repo_root): if script_ref not in artifacts: suggestions.append( { "capability_id": cap_id, "kind": "availability_artifact", "detail": f"CLI script not cited: {script_ref}", "apply_patch": { "field": "availability.current_artifacts", "append": script_ref, }, } ) return suggestions def apply_deterministic_suggestions( repo_root: Path, suggestions: list[dict[str, Any]], ) -> list[str]: paths = registry_paths(repo_root) index = load_index_at(paths["index"]) index_by_id = {row["id"]: row for row in index.get("capabilities", [])} changed: list[str] = [] entry_cache: dict[str, dict[str, Any]] = {} entry_paths: dict[str, Path] = {} for suggestion in suggestions: patch = suggestion.get("apply_patch") if not patch: continue cap_id = suggestion["capability_id"] if patch["field"] == "index.vector" and cap_id in index_by_id: index_by_id[cap_id]["vector"] = patch["value"] changed.append(f"index vector for {cap_id}") row = index_by_id.get(cap_id) if not row: continue entry_path = repo_root / row["path"] if cap_id not in entry_cache: entry_cache[cap_id] = parse_front_matter(entry_path) entry_paths[cap_id] = entry_path front_matter = entry_cache[cap_id] if patch["field"] in {"evidence.tests", "evidence.documentation"}: bucket = patch["field"].split(".")[1] items = front_matter.setdefault("evidence", {}).setdefault(bucket, []) if patch["append"] not in items: items.append(patch["append"]) changed.append(f"{cap_id} {patch['field']} += {patch['append']}") if patch["field"] == "availability.current_artifacts": artifacts = front_matter.setdefault("availability", {}).setdefault( "current_artifacts", [] ) if patch["append"] not in artifacts: artifacts.append(patch["append"]) changed.append( f"{cap_id} availability.current_artifacts += {patch['append']}" ) for suggestion in suggestions: patch = suggestion.get("apply_patch") if not patch: continue if suggestion.get("kind") == "index_row_add": cap_id = suggestion["capability_id"] row = patch.get("index_row") if row and cap_id not in index_by_id: index.setdefault("capabilities", []).append(row) changed.append(f"index row added for {cap_id}") if suggestion.get("kind") == "index_updated_stale": index["updated"] = patch.get("value", date.today().isoformat()) changed.append("index.updated bumped") if changed: index["updated"] = date.today().isoformat() paths["index"].write_text( yaml.safe_dump(index, sort_keys=False, allow_unicode=True), encoding="utf-8", ) for cap_id, front_matter in entry_cache.items(): _write_front_matter(entry_paths[cap_id], front_matter) return changed def _write_front_matter(path: Path, front_matter: dict[str, Any]) -> None: text = path.read_text(encoding="utf-8") marker_end = text.find("\n---", 4) body = text[marker_end + 4 :] if marker_end != -1 else "\n" path.write_text( "---\n" + yaml.safe_dump(front_matter, sort_keys=False, allow_unicode=True) + "---" + body, encoding="utf-8", ) def build_update_prompt( repo_root: Path, capability_id: str, *, git_since: str | None = None, ) -> str: paths = registry_paths(repo_root) index = load_index_at(paths["index"]) row = next((item for item in index["capabilities"] if item["id"] == capability_id), None) if not row: raise ValueError(f"capability not in index: {capability_id}") entry = parse_front_matter(repo_root / row["path"]) diff = "" if git_since: proc = subprocess.run( [ "git", "-C", str(repo_root), "diff", git_since, "HEAD", "--", "registry/", "reuse_surface/", "tests/", ], capture_output=True, text=True, check=False, ) diff = proc.stdout[:12000] return textwrap.dedent( f""" Suggest registry entry updates for capability `{capability_id}`. Return ONLY JSON: {{ "promotion_history": [ {{"date": "YYYY-MM-DD", "dimension": "availability", "from": "A3", "to": "A4", "rationale": "..."}} ], "consumer_feedback": ["optional string notes"], "notes": ["human review items"] }} Current entry YAML: {yaml.safe_dump(entry, sort_keys=False)} Git diff since {git_since or 'N/A'}: {diff or '(none)'} """ ).strip() def suggest_llm_updates( repo_root: Path, capability_id: str, *, git_since: str | None = None, llm_url: str | None = None, ) -> dict[str, Any]: prompt = build_update_prompt(repo_root, capability_id, git_since=git_since) return request_json_object( prompt, base_url=llm_url, config={"temperature": 0.2, "max_tokens": 2000}, ) def format_suggestions_markdown(suggestions: list[dict[str, Any]]) -> str: if not suggestions: return "# Registry update suggestions\n\n_No suggestions._\n" lines = ["# Registry update suggestions", ""] for item in suggestions: lines.append(f"- `{item['capability_id']}` **{item['kind']}**: {item['detail']}") lines.append("") lines.append(f"**{len(suggestions)}** suggestion(s). Use `--apply` to apply safe patches.") return "\n".join(lines) + "\n" def format_suggestions_json(suggestions: list[dict[str, Any]]) -> str: return json.dumps({"count": len(suggestions), "suggestions": suggestions}, indent=2)